pax_global_header00006660000000000000000000000064122105361340014507gustar00rootroot0000000000000052 comment=7020a6d48649a089af29dd5fd723cc1667ee890d netatop-0.3/000075500000000000000000000000001221053613400130215ustar00rootroot00000000000000netatop-0.3/ChangeLog000064400000000000000000000132551221053613400146010ustar00rootroot00000000000000commit f72c6ae6b7f3b2efbbfae97d25674e046f472d1f Author: Gerlof Langeveld Date: Fri Jul 19 08:43:12 2013 +0200 Add dependency of zlib-devel package. M README commit 8a0a7707c407e6e361b9f9475adc923f7d478c02 Author: Gerlof Langeveld Date: Fri Jul 19 08:42:29 2013 +0200 iUse install instead of cp command to avoid "text file busy". M Makefile commit 6a544ec9c0fd5054489f9549b34de447deac5513 Author: Gerlof Langeveld Date: Fri Jul 19 08:41:11 2013 +0200 Convert /proc interface to proc_create/seq_file. Credits: Gabriel de Perthuis M module/netatop.c commit a530702f4d6cf8607419aec671bc1cbf32e69dba Author: Gerlof Langeveld Date: Wed Dec 19 08:23:26 2012 +0100 Show module version in /proc/netatop. M module/netatop.c commit 1acce78c015e9825cb62761cd87f6ec5f4d6aba3 Author: Gerlof Langeveld Date: Wed Dec 19 08:12:19 2012 +0100 Use mutex instead of spinlock to protect the garbage collector from being called in parallel. M module/netatop.c commit 0d8b252e5345a2d11aa934e2a4635e377423e006 Author: Gerlof Langeveld Date: Mon Dec 17 15:32:49 2012 +0100 Addition of the kernel thread 'knetatopgc' to issue the garbage collection periodically. This solves a panic situation (mainly on virtual servers) where function find_vpid is called in the context of a thread that is just busy to exit. M module/netatop.c commit 3ae2b983eed4c3b2025593290382eb8bee2939e4 Author: Gerlof Langeveld Date: Tue Dec 4 14:05:50 2012 +0100 Optimize memory usage by creating slab caches for sockinfo and taskinfo structs, and by increasing the space allowed for sockinfo structs (appeared to be too tight for large servers). M module/netatop.c commit 92512f25ff88db0b1cbc9fbd7109e8655628b6bb Author: Gerlof Langeveld Date: Mon Nov 19 10:50:41 2012 +0100 Implemented separate functions for start and stop in init-script. M README M netatop.init commit e51ab1c074f8a27cca8cb8e40014283ca27f10ba Author: Gerlof Langeveld Date: Mon Nov 19 10:49:07 2012 +0100 Increased maximum space for taskinfo structs to 2MiB (was 1MiB) and increased counters in /proc/netatop from 9 till 12 positions. M module/netatop.c commit 38698454b44f873f8895cad17306d5ad288fb2dc Author: Gerlof Langeveld Date: Mon Nov 12 13:24:53 2012 +0100 Maintain an overflow counter for sockinfo and taskinfo structs that were omitted due to reaching their memory thresholds. M module/netatop.c D netatopversion.h commit ff821a1124451d205ef84c02d17cebd425df6a77 Author: Gerlof Langeveld Date: Sat Nov 10 16:29:17 2012 +0100 Separate spin-locks have been added for the global varibales nrs and nrt due to failed modifications on these variables (specifically nrs). M module/netatop.c M netatopversion.h commit a07903439c87c44d66aa3424610905c7e406c7e1 Author: Gerlof Langeveld Date: Fri Oct 26 10:34:37 2012 +0200 Added Changelog to archive. M mkdistr M netatopversion.h commit 02e360e54426229afc0c30a2d0b9f751ff934b05 Author: Gerlof Langeveld Date: Fri Oct 26 09:18:47 2012 +0200 Adapt for proper versioning. M mkdistr M netatopversion.h commit 5f849515d0e0fc66c9dfe2ab36abd482aa4b95f1 Author: Gerlof Langeveld Date: Thu Oct 25 23:21:18 2012 +0200 emoved automatic chkconfig. M Makefile commit daa8edfd29db9f7c3ed07b24b4c66085c8fdb9e7 Author: Gerlof Langeveld Date: Tue Oct 23 14:45:05 2012 +0200 Make it possible for atop to dynamically detect netatop(d) per sample. M Makefile M README M daemon/netatopd.c M mkdistr M module/netatop.c M netatop.h M netatopd.h M netatopversion.h commit a500af96306856e4d7dbb29e2ba387a3f592f7ba Author: Gerlof Langeveld Date: Mon Oct 22 20:52:13 2012 +0200 Make distribution ready and add man-pages. M Makefile A man/netatop.4 A man/netatopd.8 M mkdistr M netatopversion.h commit 3c6edf2395dd9b6386cb9f0002a79372a69903c4 Author: Gerlof Langeveld Date: Sun Oct 14 15:25:25 2012 +0200 Bug- and porting-solutions. M Makefile A README M daemon/Makefile M daemon/netatopd.c A mkdistr A mkversion M module/Makefile M module/netatop.c M netatop.init M netatopd.h A netatopversion.h commit 299f306e8d6fb43684fbc8c18cf6d0418ec4407c Author: root Date: Sat Sep 15 13:17:39 2012 +0200 Extra semaphore to indicate if netatopd itself is running. M daemon/netatopd.c commit 621e9fca7ca7b9f078ad8c2ceb302e63c2580952 Author: Gerlof Langeveld Date: Sat Sep 15 12:33:58 2012 +0200 Correction of start time of process (also taken nanoseconds into account). M module/netatop.c commit 2753b506254d8cc0fb22b67e27dd89ecaf9021db Author: Gerlof Langeveld Date: Fri Sep 14 23:24:20 2012 +0200 Introduction of daemon netatopd. Handling of new getsockopt commands. M Makefile A daemon/Makefile A daemon/netatopd.c A module/Makefile A module/netatop.c D netatop.c M netatop.h M netatop.init A netatopd.h commit ca4978d494489150f2af3af4f079330cdc1d7f1f Author: Gerlof Langeveld Date: Tue Sep 4 17:52:19 2012 +0200 Complete redesign and rename from atopnet to netatop. M Makefile D atopnet.c D atopnet.h A netatop.c A netatop.h A netatop.init commit 6109d31ecd30dbeaf730b7ca75b860e665a825ba Author: Gerlof Langeveld Date: Wed Aug 8 12:00:51 2012 +0200 Initial setup. A Makefile A atopnet.c A atopnet.h netatop-0.3/Makefile000064400000000000000000000007201221053613400144600ustar00rootroot00000000000000all: ./mkversion cd module; make cd daemon; make install: module/netatop.ko daemon/netatopd install -d /lib/modules/`uname -r`/extra install -m 0644 module/netatop.ko -t /lib/modules/`uname -r`/extra depmod install daemon/netatopd -t /usr/sbin install -T -m 0755 netatop.init /etc/init.d/netatop install man/netatop.4 -t /usr/share/man/man4 install man/netatopd.8 -t /usr/share/man/man8 clean: cd module; make clean cd daemon; make clean netatop-0.3/README000064400000000000000000000012271221053613400137030ustar00rootroot00000000000000To build the kernel module 'netatop' and the daemon 'netatopd': - install package "kernel-devel" (if not installed yet) - install package "zlib-devel" (if not installed yet) - run "make" to compile the kernel module and daemon - run "make install" to install the kernel module and daemon (as root) - run "service netatop start" to load the kernel module and start the daemon For automatic load of the 'netatop' module and startup of the 'netatopd' daemon after boot, use one of the following commands (depending of the distro): - chkconfig --add netatop OR - update-rc.d netatop defaults Gerlof Langeveld October 2012 Last update: July 2013 netatop-0.3/daemon/000075500000000000000000000000001221053613400142645ustar00rootroot00000000000000netatop-0.3/daemon/Makefile000064400000000000000000000004541221053613400157270ustar00rootroot00000000000000# Makefile for linking kernel modules MYMODULE = netatop KERNDIR = /lib/modules/$(shell uname -r)/build THISDIR = $(shell pwd) all: netatopd netatopd: netatopd.o Makefile $(CC) netatopd.o -o netatopd -lz clean: rm -f *.o netatopd netatopd.o: ../netatop.h ../netatopd.h ../netatopversion.h netatop-0.3/daemon/netatopd.c000064400000000000000000000203251221053613400162500ustar00rootroot00000000000000/* ** The netatopd daemon can be started optionally to read the counters ** of exited processes from the netatop module. The 'usual' state of this ** daemon is the getsockopt command NETATOP_GETCNT_EXIT. With this command ** netatopd blocks until an exited task is available on the exitlist of the ** netatop module. The obtained netpertask struct is written to a logfile ** (similar to the process accounting info provided by the base kernel ** itself) when there is at least 5% of free space in the filesystem of ** the logfile. ** ** The logfile contains a small header struct that contains a.o. a sequence ** number indicating how many netpertask structs are currently logged. ** This header is mmapped and can be consulted by analysis processes that ** consult the logfile (e.g. atop). ** Behind the header, the netpertask structs can be found in compressed ** form. Every compressed netpertask struct is preceeded by one byte ** specifying the size of the compressed netpertask struct. ** ** Before an analysis process starts using the logfile, it has to subscribe ** itself by decrementing the second semaphore of a semaphore group owned ** by netatopd. In this way netatopd knows how many analysis processes ** are using the logfile. When no processes use the logfile any more, ** netatopd removes the logfile and start building a new one as soon as ** a new subscription of an analysis process is noticed. ** ---------------------------------------------------------------------- ** Copyright (C) 2012 Gerlof Langeveld (gerlof.langeveld@atoptool.nl) ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License version 2 as ** published by the Free Software Foundation. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../netatop.h" #include "../netatopd.h" #include "../netatopversion.h" #define EVER ;; #include #include static int histopen(struct naheader **); static void recstore(int, struct netpertask *, socklen_t); /* ** Semaphore-handling ** ** A semaphore-group with two semaphores is created. The first semaphore ** specifies the number of netatopd processes running (to be sure that only ** one daemon is active at the time) ) and the second reflects the number ** of processes using the log-file (inverted). ** This second semaphore is initialized at some high value and is ** decremented by every analysis process (like atop) that uses the log-file ** and incremented as soon as such analysis process stops again. */ #define SEMTOTAL 100 #define NUMCLIENTS (SEMTOTAL - semctl(semid, 1, GETVAL, 0)) int main(int argc, char *argv[]) { int i, netsock, histfd = -1, semid; struct netpertask npt; socklen_t len = sizeof npt; struct naheader *nap; struct sigaction sigact; void gethup(int); struct sembuf semincr = {0, +1, SEM_UNDO}; /* ** version number required? */ if (argc == 2 && *argv[1] == '-' && *(argv[1]+1) == 'v') { printf("%s - %s \n", NETATOPVERSION, NETATOPDATE); return 0; } /* ** verify if we are running with the right privileges */ if (geteuid() != 0) { fprintf(stderr, "Root privileges are needed!\n"); exit(1); } /* ** open socket to IP layer */ if ( (netsock = socket(PF_INET, SOCK_RAW, IPPROTO_RAW)) == -1) { perror("open raw socket"); exit(2); } /* ** create the semaphore group and initialize it; ** if it already exists, verify if a netatopd daemon ** is already running */ if ( (semid = semget(SEMAKEY, 0, 0)) >= 0) // exists? { if ( semctl(semid, 0, GETVAL, 0) == 1) { fprintf(stderr, "Another netatopd is already running!"); exit(3); } } else { if ( (semid = semget(SEMAKEY, 2, 0600|IPC_CREAT|IPC_EXCL)) >= 0) { (void) semctl(semid, 0, SETVAL, 0); (void) semctl(semid, 1, SETVAL, SEMTOTAL); } else { perror("cannot create semaphore"); exit(3); } } /* ** daemonize this process */ if ( fork() ) exit(0); // implicitly switch to background setsid(); if ( fork() ) exit(0); for (i=0; i < 1024; i++) if (i != netsock) close(i); umask(022); chdir("/tmp"); /* ** open syslog interface for failure messages */ openlog("netatopd", LOG_PID, LOG_DAEMON); syslog(LOG_INFO, "version %s actived\n", NETATOPVERSION); /* ** raise semaphore to define a busy netatopd */ if ( semop(semid, &semincr, 1) == -1) { syslog(LOG_ERR, "cannot increment semaphore\n"); exit(3); } /* ** the daemon can be woken up from getsockopt by receiving ** the sighup signal to verify if there are no clients any more ** (truncate exitfile) */ memset(&sigact, 0, sizeof sigact); sigact.sa_handler = gethup; sigaction(SIGHUP, &sigact, (struct sigaction *)0); /* ** raise priority */ (void) nice(-39); /* ** open history file */ histfd = histopen(&nap); /* ** continuously obtain the info about exited processes */ for (EVER) { /* ** check if anybody is interested in the exitfile ** if not, close and truncate it */ if (NUMCLIENTS == 0 && nap->curseq != 0) { /* ** destroy and reopen history file */ munmap(nap, sizeof(struct naheader)); close(histfd); syslog(LOG_INFO, "reopen history file\n"); histfd = histopen(&nap); } /* ** get next exited process (call blocks till available) */ switch (getsockopt(netsock, SOL_IP, NETATOP_GETCNT_EXIT, &npt, &len)) { case 0: // skip if nobody is using it if (NUMCLIENTS == 0) continue; /* ** at least one listener is active, so ** store record for exited process */ recstore(histfd, &npt, len); /* ** increment sequence number in file header */ nap->curseq++; break; default: // getsockopt failed switch (errno) { // no netatop module loaded? case ENOPROTOOPT: sleep(10); continue; // signal received? case EINTR: continue; default: syslog(LOG_ERR, "getsockopt failed\n"); return 0; } } } return 0; } /* ** open history file */ static int histopen(struct naheader **nahp) { int fd; struct naheader nahdr = {MYMAGIC, 0, sizeof(struct naheader), sizeof(struct netpertask), getpid()}; /* ** remove the old file; this way atop can detect that a ** new file must be opened */ (void) unlink(NETEXITFILE); /* ** open new file */ if ( (fd = open(NETEXITFILE, O_RDWR|O_CREAT|O_TRUNC, 0644)) == -1) { syslog(LOG_ERR, "cannot open %s for write\n", NETEXITFILE); exit(3); } /* ** write new header and mmap */ if ( write(fd, &nahdr, sizeof nahdr) != sizeof nahdr) { syslog(LOG_ERR, "cannot write to %s\n", NETEXITFILE); exit(3); } *nahp = mmap((void *)0, sizeof *nahp, PROT_WRITE, MAP_SHARED, fd, 0); if (*nahp == (void *) -1) { syslog(LOG_ERR, "mmap of %s failed\n", NETEXITFILE); exit(3); } return fd; } static void recstore(int fd, struct netpertask *np, socklen_t len) { Byte compbuf[sizeof *np + 128]; unsigned long complen = sizeof compbuf -1; struct statvfs statvfs; int rv; /* ** check if the filesystem is not filled for more than 95% */ if ( fstatvfs(fd, &statvfs) != -1) { if (statvfs.f_bfree * 100 / statvfs.f_blocks < 5) { syslog(LOG_ERR, "Filesystem > 95%% full; " "write skipped\n"); return; } } /* ** filesystem space sufficient ** compress netpertask struct */ rv = compress(compbuf+1, &complen, (Byte *)np, (unsigned long)sizeof *np); switch (rv) { case Z_OK: case Z_STREAM_END: case Z_NEED_DICT: break; default: syslog(LOG_ERR, "compression failure\n"); exit(5); } compbuf[0] = (Byte)complen; /* ** write compressed netpertask struct, headed by one byte ** with the size of the compressed struct */ if ( write(fd, compbuf, complen+1) < complen) { syslog(LOG_ERR, "write failure\n"); exit(5); } } /* ** dummy handler for SIGHUP */ void gethup(int sig) { } netatop-0.3/man/000075500000000000000000000000001221053613400135745ustar00rootroot00000000000000netatop-0.3/man/netatop.4000064400000000000000000000117561221053613400153450ustar00rootroot00000000000000.TH NETATOP 4 "October 2012" "Linux" .SH NAME .B netatop - per-process network statistics gathering .SH SYNOPSIS modprobe netatop .SH DESCRIPTION This manual page documents the .I netatop kernel module. This module uses the netfilter interface to gather statistics about TCP and UDP traffic per task, on level of process (thread group) and individual thread. .PP Once the module is loaded via modprobe, it is called for every packet that is transmitted by a local process and every packet that is received from an interface. For each packet it tries to determine the related process and thread to maintain statistical counters about the number of packets transmitted/received and the number of bytes transmitted/received. Separate counters are maintained for TCP and UDP packets. It does not only view the packets that contain the user data but also the protocol related packets (like SYN, ACK, ... for the TCP protocol). .br Received packets can only be identified as related to a process after that process has transmitted at least one packet in the process' context. .PP .SH DETAILS When the module discovers a packet for a new connection quintuple (TCP) or for a new local UDP port, it creates a so-called sockinfo structure. As soon as the .I netatop module knows to what process or thread the sockinfo struct is related, a reference is made from the sockinfo struct to the taskinfo struct that represents the proces or thread within the module. However, the related task can only be determined when a packet is transmitted, i.e. the module is called in the context of the transmitting process. At such moment the tgid (process) and pid (thread) can be obtained from the process administration to be stored in the module's own taskinfo structs (one for the process, one for the thread). .br For the time that the sockinfo struct can not be related to a taskinfo struct (e.g. when only packets are received so far), counters are maintained temporarily in the sockinfo struct. After a related taskinfo struct has been discovered (i.e. the task transmits), counters will be maintained in the taskinfo struct itself. When packets are only .B received for a socket (e.g. another machine is sending UDP packets to the local machine) while the local task never responds, no match to a process can be made and the packets remain unidentified by the netatop module. At least one packet should have been sent by a local process to be able to identify packets for such process. .PP The module uses a garbage collector to cleanup the unused sockinfo structs if connections do not exist any more (TCP) or have not been used for some time (TCP/UDP). Furthermore, the garbage collector checks if taskinfo structs still represent existing processes or threads. If not, the taskinfo struct is destroyed (in case of a thread) or it is moved to a separate list of finished processes (in case of a process). Analysis programs can read the taskinfo of such finished process. When the taskinfo struct of a finished process is not read within 15 seconds, the taskinfo struct will be destroyed from the exitlist. .br The garbage collector can be acivated by issueing a special getsockopt call from an analysis program (e.g. atop). Apart from that, a time-based garbage collector activation is issued anyhow every 15 seconds. .PP .SH SUPPORTED IOCTLS .PP Programs can open an IP socket and use the getsockopt() system call to issue commands to this module. With this system call the following commands can be issued: .TP 4 .B ATOP_GETCNT_TGID Obtain the current counters for a specific process (thread group) in a netpertask structure. When the required process does not exist, errno ESRCH is given. .PP .TP 4 .B ATOP_GETCNT_PID Obtain the current counters for a specific thread in a netpertask structure. When the required thread does not exist, errno ESRCH is given. .PP .TP 4 .B ATOP_GETCNT_EXIT Obtain the counters of an exited process. This command has to be issued within 15 seconds after a process has been declared 'finished' by the garbage collector. Whenever this command is issued while there is no exited process in the exitlist, the requesting process is blocked until an exited process is available. .PP .TP 4 .B NETATOP_FORCE_GC Activate the garbage collector of the .I netatop module to determine if sockinfo structs of old connections/ports can be destroyed and to determine if taskinfo structs of exited processes can be moved to the exitlist. .PP .TP 4 .B NETATOP_EMPTY_EXIT Block the calling process until the exitlist with the taskinfo structs of exited processes is empty. .PP .SH FILES .TP 4 .B /proc/netatop In this file, counters can be found that show the total number of packets sent/received and the number of packets that were unidentified (i.e. not accounted to a process/thread). .br Furthermore, counters can be found about the current number of sockinfo structs, active taskinfo structs and taskinfo structs of exited processes. .SH SEE ALSO .B netatopd(8), .B atop(1), .B atopsar(1), .B atoprc(5) .br .B http://www.atoptool.nl .SH AUTHOR Gerlof Langeveld (gerlof.langeveld@atoptool.nl) netatop-0.3/man/netatopd.8000064400000000000000000000036301221053613400155050ustar00rootroot00000000000000.TH NETATOPD 8 "October 2012" "Linux" .SH NAME .B netatopd - log network statistics of finished processes .SH DESCRIPTION This manual page documents the .I netatopd daemon. This daemon reads the counters of exited processes delivered by the .I netatop module and writes the counters to a logfile. .PP The 'usual' state of this daemon is the getsockopt() command NETATOP_GETCNT_EXIT. With this command, .I netatopd blocks until an exited task is available on the exitlist of the .I netatop module. The obtained netpertask struct that countains the counters of an exited process is written to a logfile (similar to the process accounting info provided by the base kernel itself). The daeon stops logging when there is only 5% of free space left in the filesystem of the logfile. .PP The logfile starts with a small header struct that contains a.o. a sequence number indicating how many netpertask structs are currently logged. This header is mmapped and can be consulted by analysis processes like .I atop that consult the logfile. Behind the header, the netpertask structs can be found in compressed form. Every compressed netpertask struct is preceeded by one byte specifying the size of the compressed netpertask struct. .PP Before an analysis process starts using the logfile, it has to subscribe itself by decrementing a semaphore owned by the .I netatopd daemon. In this way, .I netatopd knows how many analysis processes are currently using the logfile. When no processes use the logfile any more, .I netatopd truncates the logfile and start building a new one as soon as a new subscription of an analysis process is noticed. .PP .SH FILES .PP .TP 5 .B /var/run/netatop.log File in which .I netatopd writes the compressed accounting records about processes that have finished. .SH SEE ALSO .B netatop(4), .B atop(1), .B atopsar(1), .B atoprc(5) .br .B http://www.atoptool.nl .SH AUTHOR Gerlof Langeveld (gerlof.langeveld@atoptool.nl) netatop-0.3/mkversion000075500000000000000000000006611221053613400147670ustar00rootroot00000000000000#!/bin/bash # # Generate proper versioning include file # CURDATE=$(date +%Y\/%m\/%d\ %H:%M:%S) if [ -d .git ] # git versioning available? then CURVERS=$(git branch | grep '^\* ' | sed -e 's/[* ]//g') if [ "$CURVERS" = master ] then CURVERS="T$(date +%Y%m%d%H%M%S)" fi else exit 0 # do not modify fi echo "#define NETATOPVERSION \"$CURVERS\"" > netatopversion.h echo "#define NETATOPDATE \"$CURDATE\"" >> netatopversion.h netatop-0.3/module/000075500000000000000000000000001221053613400143065ustar00rootroot00000000000000netatop-0.3/module/Makefile000064400000000000000000000007331221053613400157510ustar00rootroot00000000000000# Makefile for linking kernel modules MYMODULE = netatop KERNDIR = /lib/modules/$(shell uname -r)/build THISDIR = $(shell pwd) obj-m := $(MYMODULE).o $(MYMODULE).ko: $(MYMODULE).c echo start the make make -C $(KERNDIR) M=$(THISDIR) modules clean: rm -f *.o *.ko rm -f .netatop* rm -f netatop.ko.unsigned netatop.mod.c rm -f Module.symvers rm -f modules.order rm -fr .tmp_versions netatop.o: ../netatop.h ../netatopversion.h netatop-0.3/module/netatop.c000064400000000000000000001245431221053613400161350ustar00rootroot00000000000000/* ** This module uses the netfilter interface to maintain statistics ** about the network traffic per task, on level of thread group ** and individual thread. ** ** General setup ** ------------- ** Once the module is active, it is called for every packet that is ** transmitted by a local process and every packet that is received ** from an interface. Not only the packets that contain the user data ** are passed but also the TCP related protocol packets (SYN, ACK, ...). ** ** When the module discovers a packet for a connection (TCP) or local ** port (UDP) that is new, it creates a sockinfo structure. As soon as ** possible the sockinfo struct will be connected to a taskinfo struct ** that represents the proces or thread that is related to the socket. ** However, the task can only be determined when a packet is transmitted, ** i.e. the module is called during system call handling in the context ** of the transmitting process. At that moment the tgid (process) and ** pid (thread) can be obtained from the process administration to ** be stored in the module's own taskinfo structs (one for the process, ** one for the thread). ** For the time that the sockinfo struct can not be related to a taskinfo ** struct (e.g. when only packets are received), counters are maintained ** temporarily in the sockinfo struct. As soon as a related taskinfo struct ** is discovered when the task transmits, counters will be maintained in ** the taskinfo struct itself. ** When packets are only received for a socket (e.g. another machine is ** sending UDP packets to the local machine) while the local task ** never responds, no match to a process can be made and the packets ** remain unidentified by the netatop module. At least one packet should ** have been sent by a local process to be able to match packets for such ** socket. ** In the file /proc/netatop counters can be found that show the total ** number of packets sent/received and how many of these packets were ** unidentified (i.e. not accounted to a process/thread). ** ** Garbage collection ** ------------------ ** The module uses a garbage collector to cleanup the unused sockinfo ** structs if connections do not exist any more (TCP) or have not been ** used for some time (TCP/UDP). ** Furthermore, the garbage collector checks if the taskinfo structs ** still represent existing processes or threads. If not, the taskinfo struct ** is destroyed (in case of a thread) or it is moved to a separate list of ** finished processes (in case of a process). Analysis programs can read ** the taskinfo of such finished process. When the taskinfo of a finished ** process is not read within 15 seconds, the taskinfo will be destroyed. ** ** A garbage collector cycle can be triggered by issueing a getsockopt ** call from an analysis program (e.g. atop). Apart from that, a time-based ** garbage collector cycle is issued anyhow every 15 seconds by the ** knetatop kernel thread. ** ** Interface with user mode ** ------------------------ ** Programs can open an IP socket and use the getsockopt() system call ** to issue commands to this module. With the command ATOP_GETCNT_TGID ** the current counters can be obtained on process level (thread group) ** and with the command ATOP_GETCNT_PID the counters on thread level. ** For both commands, the tgid/pid has to be passed of the required thread ** (group). When the required thread (group) does not exist, an errno ESRCH ** is given. ** ** The command ATOP_GETCNT_EXIT can be issued to obtain the counters of ** an exited process. As stated above, such command has to be issued ** within 15 seconds after a process has been declared 'finished' by ** the garbage collector. Whenever this command is issued and no exited ** process is in the exitlist, the requesting process is blocked until ** an exited process is available. ** ** The command NETATOP_FORCE_GC activates the garbage collector of the ** netatop module to determine if sockinfo's of old connections/ports ** can be destroyed and if taskinfo's of exited processes can be ** The command NETATOP_EMPTY_EXIT can be issued to wait until the exitlist ** with the taskinfo's of exited processes is empty. ** ---------------------------------------------------------------------- ** Copyright (C) 2012 Gerlof Langeveld (gerlof.langeveld@atoptool.nl) ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License version 2 as ** published by the Free Software Foundation. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../netatop.h" #include "../netatopversion.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Gerlof Langeveld "); MODULE_DESCRIPTION("Per-task network statistics"); MODULE_VERSION(NETATOPVERSION); #define GCINTERVAL (HZ*15) // interval garbage collector (jiffies) #define GCMAXUDP (HZ*16) // max inactivity for UDP (jiffies) #define GCMAXTCP (HZ*1800) // max inactivity for TCP (jiffies) #define GCMAXUNREF (HZ*60) // max time without taskref (jiffies) #define SILIMIT (4096*1024) // maximum memory for sockinfo structs #define TILIMIT (2048*1024) // maximum memory for taskinfo structs #define NF_IP_PRE_ROUTING 0 #define NF_IP_LOCAL_IN 1 #define NF_IP_FORWARD 2 #define NF_IP_LOCAL_OUT 3 #define NF_IP_POST_ROUTING 4 /* ** struct that maintains statistics about the network ** traffic caused per thread or thread group */ struct chainer { void *next; void *prev; }; struct taskinfobucket; struct taskinfo { struct chainer ch; pid_t id; // tgid or pid char type; // 'g' (thread group) or // 't' (thread) unsigned char state; // see below char command[COMLEN]; unsigned long btime; // start time of process unsigned long long exittime; // time inserted in exitlist struct taskcount tc; }; static struct kmem_cache *ticache; // taskinfo cache // state values above #define CHECKED 1 // verified that task still exists #define INDELETE 2 // task exited but still in hash list #define FINISHED 3 // task on exit list /* ** hash tables to find a particular thread group or thread */ #define TBUCKS 1024 // must be multiple of 2! #define THASH(x, t) (((x)+t)&(TBUCKS-1)) struct taskinfobucket { struct chainer ch; spinlock_t lock; } thash[TBUCKS]; static unsigned long nrt; // current number of taskinfo allocated static unsigned long nrt_ovf; // no taskinfo allocated due to overflow static DEFINE_SPINLOCK(nrtlock); static struct taskinfo *exithead; // linked list of exited processes static struct taskinfo *exittail; static DEFINE_SPINLOCK(exitlock); static DECLARE_WAIT_QUEUE_HEAD(exitlist_filled); static DECLARE_WAIT_QUEUE_HEAD(exitlist_empty); static unsigned long nre; // current number of taskinfo on exitlist /* ** structs that uniquely identify a TCP connection (host endian format) */ struct tcpv4_ident { uint32_t laddr; /* local IP address */ uint32_t raddr; /* remote IP address */ uint16_t lport; /* local port number */ uint16_t rport; /* remote port number */ }; struct tcpv6_ident { struct in6_addr laddr; /* local IP address */ struct in6_addr raddr; /* remote IP address */ uint16_t lport; /* local port number */ uint16_t rport; /* remote port number */ }; /* ** struct to maintain the reference from a socket ** to a thread and thread-group */ struct sockinfo { struct chainer ch; unsigned char last_state; // last known state of socket uint8_t proto; // protocol union keydef { uint16_t udp; // UDP ident (only portnumber) struct tcpv4_ident tcp4; // TCP connection ident IPv4 struct tcpv6_ident tcp6; // TCP connection ident IPv6 } key; struct taskinfo *tgp; // ref to thread group struct taskinfo *thp; // ref to thread (or NULL) short tgh; // hash number of thread group short thh; // hash number of thread unsigned int sndpacks; // temporary counters in case unsigned int rcvpacks; // known yet unsigned long sndbytes; // no relation to process is unsigned long rcvbytes; unsigned long long lastact; // last updated (jiffies) }; static struct kmem_cache *sicache; // sockinfo cache /* ** hash table to find a socket reference */ #define SBUCKS 1024 // must be multiple of 2! #define SHASHTCP4(x) (((x).raddr+(x).lport+(x).rport)&(SBUCKS-1)) #define SHASHUDP(x) ((x)&(SBUCKS-1)) struct { struct chainer ch; spinlock_t lock; } shash[SBUCKS]; static unsigned long nrs; // current number sockinfo allocated static unsigned long nrs_ovf; // no sockinfo allocated due to overflow static DEFINE_SPINLOCK(nrslock); /* ** various static counters */ static unsigned long icmpsndbytes; static unsigned long icmpsndpacks; static unsigned long icmprcvbytes; static unsigned long icmprcvpacks; static unsigned long tcpsndpacks; static unsigned long tcprcvpacks; static unsigned long udpsndpacks; static unsigned long udprcvpacks; static unsigned long unidentudpsndpacks; static unsigned long unidentudprcvpacks; static unsigned long unidenttcpsndpacks; static unsigned long unidenttcprcvpacks; static unsigned long unknownproto; static DEFINE_MUTEX(gclock); static unsigned long long gclast; // last garbage collection (jiffies) static struct task_struct *knetatop_task; static struct timespec boottime; /* ** function prototypes */ static void analyze_tcpv4_packet(struct sk_buff *, const struct net_device *, int, char, struct iphdr *, void *); static void analyze_udp_packet(struct sk_buff *, const struct net_device *, int, char, struct iphdr *, void *); static int sock2task(char, struct sockinfo *, struct taskinfo **, short *, struct sk_buff *, const struct net_device *, int, char); static void update_taskcounters(struct sk_buff *, const struct net_device *, struct taskinfo *, char); static void update_sockcounters(struct sk_buff *, const struct net_device *, struct sockinfo *, char); static void sock2task_sync(struct sk_buff *, struct sockinfo *, struct taskinfo *); static void register_unident(struct sockinfo *); static int calc_reallen(struct sk_buff *, const struct net_device *); static void get_tcpv4_ident(struct iphdr *, void *, char, union keydef *); static struct sockinfo *find_sockinfo(int, union keydef *, int, int); static struct sockinfo *make_sockinfo(int, union keydef *, int, int); static void wipesockinfo(void); static void wipetaskinfo(void); static void wipetaskexit(void); static void garbage_collector(void); static void gctaskexit(void); static void gcsockinfo(void); static void gctaskinfo(void); static void move_taskinfo(struct taskinfo *); static void delete_taskinfo(struct taskinfo *); static void delete_sockinfo(struct sockinfo *); static struct taskinfo *get_taskinfo(pid_t, char); static int getsockopt(struct sock *, int, void *, int *); static int netatop_open(struct inode *inode, struct file *file); /* ** hook definitions */ static struct nf_hook_ops hookin_ipv4; static struct nf_hook_ops hookout_ipv4; /* ** getsockopt definitions for communication with user space */ static struct nf_sockopt_ops sockopts = { .pf = PF_INET, .get_optmin = NETATOP_BASE_CTL, .get_optmax = NETATOP_BASE_CTL+6, .get = getsockopt, .owner = THIS_MODULE, }; static struct file_operations netatop_proc_fops = { .open = netatop_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, .owner = THIS_MODULE, }; /* ** hook function to be called for every incoming local packet */ static unsigned int ipv4_hookin(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct iphdr *iph; void *trh; if (skb == NULL) // useless socket buffer? return NF_ACCEPT; /* ** get pointer to IP header and transport header */ iph = (struct iphdr *)skb_network_header(skb); trh = ((char *)iph + (iph->ihl * 4)); /* ** react on protocol number */ switch (iph->protocol) { case IPPROTO_TCP: tcprcvpacks++; analyze_tcpv4_packet(skb, in, 0, 'i', iph, trh); break; case IPPROTO_UDP: udprcvpacks++; analyze_udp_packet(skb, in, 0, 'i', iph, trh); break; case IPPROTO_ICMP: icmprcvpacks++; icmprcvbytes += skb->len + in->hard_header_len + 4; break; default: unknownproto++; } // accept every packet after stats gathering return NF_ACCEPT; } /* ** hook function to be called for every outgoing local packet */ static unsigned int ipv4_hookout(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { int in_syscall = !in_interrupt(); struct iphdr *iph; void *trh; if (skb == NULL) // useless socket buffer? return NF_ACCEPT; /* ** get pointer to IP header and transport header */ iph = (struct iphdr *)skb_network_header(skb); trh = skb_transport_header(skb); /* ** react on protocol number */ switch (iph->protocol) { case IPPROTO_TCP: tcpsndpacks++; analyze_tcpv4_packet(skb, out, in_syscall, 'o', iph, trh); break; case IPPROTO_UDP: udpsndpacks++; analyze_udp_packet(skb, out, in_syscall, 'o', iph, trh); break; case IPPROTO_ICMP: icmpsndpacks++; icmpsndbytes += skb->len + out->hard_header_len + 4; break; default: unknownproto++; } // accept every packet after stats gathering return NF_ACCEPT; } /* ** generic function (for input and output) to analyze the current packet */ static void analyze_tcpv4_packet(struct sk_buff *skb, const struct net_device *ndev, // interface description int in_syscall, // called during system call? char direction, // incoming ('i') or outgoing ('o') struct iphdr *iph, void *trh) { union keydef key; struct sockinfo *sip; int bs; // hash bucket for sockinfo unsigned long sflags; /* ** determine tcpv4_ident that identifies this TCP packet ** and calculate hash bucket in sockinfo hash */ get_tcpv4_ident(iph, trh, direction, &key); /* ** check if we have seen this tcpv4_ident before with a ** corresponding thread and thread group */ bs = SHASHTCP4(key.tcp4); spin_lock_irqsave(&shash[bs].lock, sflags); if ( (sip = find_sockinfo(IPPROTO_TCP, &key, sizeof key.tcp4, bs)) == NULL) { // no sockinfo yet: create one if ( (sip = make_sockinfo(IPPROTO_TCP, &key, sizeof key.tcp4, bs)) == NULL) { if (direction == 'i') unidenttcprcvpacks++; else unidenttcpsndpacks++; goto unlocks; } } if (skb->sk) sip->last_state = skb->sk->sk_state; /* ** if needed (re)connect the sockinfo to a taskinfo and update ** the counters */ // connect to thread group and update if (sock2task('g', sip, &sip->tgp, &sip->tgh, skb, ndev, in_syscall, direction)) { // connect to thread and update (void) sock2task('t', sip, &sip->thp, &sip->thh, skb, ndev, in_syscall, direction); } unlocks: spin_unlock_irqrestore(&shash[bs].lock, sflags); } /* ** generic function (for input and output) to analyze the current packet */ static void analyze_udp_packet(struct sk_buff *skb, const struct net_device *ndev, // interface description int in_syscall, // called during system call? char direction, // incoming ('i') or outgoing ('o') struct iphdr *iph, void *trh) { struct udphdr *udph = (struct udphdr *)trh; uint16_t udplocal = (direction == 'i' ? ntohs(udph->dest) : ntohs(udph->source)); int bs; // hash bucket for sockinfo union keydef key; struct sockinfo *sip; unsigned long sflags; /* ** check if we have seen this local UDP port before with a ** corresponding thread and thread group */ key.udp = udplocal; bs = SHASHUDP(udplocal); spin_lock_irqsave(&shash[bs].lock, sflags); if ( (sip = find_sockinfo(IPPROTO_UDP, &key, sizeof key.udp, bs)) == NULL) { // no sockinfo yet: create one if ( (sip = make_sockinfo(IPPROTO_UDP, &key, sizeof key.udp, bs)) == NULL) { if (direction == 'i') unidentudprcvpacks++; else unidentudpsndpacks++; goto unlocks; } } /* ** if needed (re)connect the sockinfo to a taskinfo and update ** the counters */ // connect to thread group and update if (sock2task('g', sip, &sip->tgp, &sip->tgh, skb, ndev, in_syscall, direction)) { // connect to thread and update (void) sock2task('t', sip, &sip->thp, &sip->thh, skb, ndev, in_syscall, direction); } unlocks: spin_unlock_irqrestore(&shash[bs].lock, sflags); } /* ** connect the sockinfo to the correct taskinfo and update the counters */ static int sock2task(char idtype, struct sockinfo *sip, struct taskinfo **tipp, short *hash, struct sk_buff *skb, const struct net_device *ndev, int in_syscall, char direction) { pid_t curid; unsigned long tflags; if (*tipp == NULL) { /* ** no taskinfo connected yet for this reference from ** sockinfo; to connect to a taskinfo, we must ** be in system call handling now --> verify */ if (!in_syscall) { if (idtype == 'g') update_sockcounters(skb, ndev, sip, direction); return 0; // failed } /* ** try to find existing taskinfo or create new taskinfo */ curid = (idtype == 'g' ? current->tgid : current->pid); *hash = THASH(curid, idtype); // calc hashQ spin_lock_irqsave(&thash[*hash].lock, tflags); if ( (*tipp = get_taskinfo(curid, idtype)) == NULL) { /* ** not possible to connect */ spin_unlock_irqrestore(&thash[*hash].lock, tflags); if (idtype == 'g') update_sockcounters(skb, ndev, sip, direction); return 0; // failed } /* ** new connection made: ** update task counters with sock counters */ sock2task_sync(skb, sip, *tipp); } else { /* ** already related to thread group or thread ** lock existing task */ spin_lock_irqsave(&thash[*hash].lock, tflags); /* ** check if socket has been passed to another process in the ** meantime, like programs as xinetd use to do ** if so, connect sockinfo to the new task */ if (in_syscall) { curid = (idtype == 'g' ? current->tgid : current->pid); if ((*tipp)->id != curid) { spin_unlock_irqrestore(&thash[*hash].lock, tflags); *hash = THASH(curid, idtype); spin_lock_irqsave(&thash[*hash].lock, tflags); if ( (*tipp = get_taskinfo(curid, idtype)) == NULL) { spin_unlock_irqrestore( &thash[*hash].lock, tflags); return 0; } } } } update_taskcounters(skb, ndev, *tipp, direction); spin_unlock_irqrestore(&thash[*hash].lock, tflags); return 1; } /* ** update the statistics of a particular thread group or thread */ static void update_taskcounters(struct sk_buff *skb, const struct net_device *ndev, struct taskinfo *tip, char direction) { struct iphdr *iph = (struct iphdr *)skb_network_header(skb); int reallen = calc_reallen(skb, ndev); switch (iph->protocol) { case IPPROTO_TCP: if (direction == 'i') { tip->tc.tcprcvpacks++; tip->tc.tcprcvbytes += reallen; } else { tip->tc.tcpsndpacks++; tip->tc.tcpsndbytes += reallen; } break; case IPPROTO_UDP: if (direction == 'i') { tip->tc.udprcvpacks++; tip->tc.udprcvbytes += reallen; } else { tip->tc.udpsndpacks++; tip->tc.udpsndbytes += reallen; } } } /* ** update the statistics of a sockinfo without a connected task */ static void update_sockcounters(struct sk_buff *skb, const struct net_device *ndev, struct sockinfo *sip, char direction) { int reallen = calc_reallen(skb, ndev); if (direction == 'i') { sip->rcvpacks++; sip->rcvbytes += reallen; } else { sip->sndpacks++; sip->sndbytes += reallen; } } /* ** add the temporary counters in the sockinfo to the new connected task */ static void sock2task_sync(struct sk_buff *skb, struct sockinfo *sip, struct taskinfo *tip) { struct iphdr *iph = (struct iphdr *)skb_network_header(skb); switch (iph->protocol) { case IPPROTO_TCP: tip->tc.tcprcvpacks += sip->rcvpacks; tip->tc.tcprcvbytes += sip->rcvbytes; tip->tc.tcpsndpacks += sip->sndpacks; tip->tc.tcpsndbytes += sip->sndbytes; break; case IPPROTO_UDP: tip->tc.udprcvpacks += sip->rcvpacks; tip->tc.udprcvbytes += sip->rcvbytes; tip->tc.udpsndpacks += sip->sndpacks; tip->tc.udpsndbytes += sip->sndbytes; } } static void register_unident(struct sockinfo *sip) { switch (sip->proto) { case IPPROTO_TCP: unidenttcprcvpacks += sip->rcvpacks; unidenttcpsndpacks += sip->sndpacks; break; case IPPROTO_UDP: unidentudprcvpacks += sip->rcvpacks; unidentudpsndpacks += sip->sndpacks; } } /* ** calculate the number of bytes that are really sent or received */ static int calc_reallen(struct sk_buff *skb, const struct net_device *ndev) { /* ** calculate the real load of this packet on the network: ** ** - length of IP header, TCP/UDP header and data (skb->len) ** ** since packet assembly/disassembly is done by the IP layer ** (we get an input packet that has been assembled already and ** an output packet that still has to be assembled), additional ** IP headers/interface headers and interface headers have ** to be calculated for packets that are larger than the mtu ** ** - interface header length + 4 bytes crc */ int reallen = skb->len; if (reallen > ndev->mtu) reallen += (reallen / ndev->mtu) * (sizeof(struct iphdr) + ndev->hard_header_len + 4); reallen += ndev->hard_header_len + 4; return reallen; } /* ** find the tcpv4_ident for the current packet, represented by ** the skb_buff */ static void get_tcpv4_ident(struct iphdr *iph, void *trh, char direction, union keydef *key) { struct tcphdr *tcph = (struct tcphdr *)trh; memset(key, 0, sizeof *key); // important for memcmp later on /* ** determine local/remote IP address and ** determine local/remote port number */ switch (direction) { case 'i': // incoming packet key->tcp4.laddr = ntohl(iph->daddr); key->tcp4.raddr = ntohl(iph->saddr); key->tcp4.lport = ntohs(tcph->dest); key->tcp4.rport = ntohs(tcph->source); break; case 'o': // outgoing packet key->tcp4.laddr = ntohl(iph->saddr); key->tcp4.raddr = ntohl(iph->daddr); key->tcp4.lport = ntohs(tcph->source); key->tcp4.rport = ntohs(tcph->dest); } } /* ** search for the sockinfo holding the given address info ** the appropriate hash bucket must have been locked before calling */ static struct sockinfo * find_sockinfo(int proto, union keydef *identp, int identsz, int hash) { struct sockinfo *sip = shash[hash].ch.next; /* ** search for appropriate struct */ while (sip != (void *)&shash[hash].ch) { if ( memcmp(&sip->key, identp, identsz) == 0 && sip->proto == proto) { sip->lastact = jiffies_64; return sip; } sip = sip->ch.next; } return NULL; // not existing } /* ** create a new sockinfo and fill ** the appropriate hash bucket must have been locked before calling */ static struct sockinfo * make_sockinfo(int proto, union keydef *identp, int identsz, int hash) { struct sockinfo *sip; unsigned long flags; /* ** check if the threshold of memory used for sockinfo structs ** is reached to avoid that a fork bomb of processes opening ** a socket leads to memory overload */ if ( (nrs+1) * sizeof(struct sockinfo) > SILIMIT) { spin_lock_irqsave(&nrslock, flags); nrs_ovf++; spin_unlock_irqrestore(&nrslock, flags); return NULL; } if ( (sip = kmem_cache_alloc(sicache, GFP_ATOMIC)) == NULL) return NULL; spin_lock_irqsave(&nrslock, flags); nrs++; spin_unlock_irqrestore(&nrslock, flags); /* ** insert new struct in doubly linked list */ memset(sip, '\0', sizeof *sip); sip->ch.next = &shash[hash].ch; sip->ch.prev = shash[hash].ch.prev; ((struct sockinfo *)shash[hash].ch.prev)->ch.next = sip; shash[hash].ch.prev = sip; sip->proto = proto; sip->lastact = jiffies_64; sip->key = *identp; return sip; } /* ** search the taskinfo structure holding the info about the given id/type ** if such taskinfo is not yet present, create a new one */ static struct taskinfo * get_taskinfo(pid_t id, char type) { int bt = THASH(id, type); struct taskinfo *tip = thash[bt].ch.next; unsigned long tflags; /* ** search if id exists already */ while (tip != (void *)&thash[bt].ch) { if (tip->id == id && tip->type == type) return tip; tip = tip->ch.next; } /* ** check if the threshold of memory used for taskinfo structs ** is reached to avoid that a fork bomb of processes opening ** a socket lead to memory overload */ if ( (nre+nrt+1) * sizeof(struct taskinfo) > TILIMIT) { spin_lock_irqsave(&nrtlock, tflags); nrt_ovf++; spin_unlock_irqrestore(&nrtlock, tflags); return NULL; } /* ** id not known yet ** add new entry to hash list */ if ( (tip = kmem_cache_alloc(ticache, GFP_ATOMIC)) == NULL) return NULL; spin_lock_irqsave(&nrtlock, tflags); nrt++; spin_unlock_irqrestore(&nrtlock, tflags); /* ** insert new struct in doubly linked list ** and fill values */ memset(tip, '\0', sizeof *tip); tip->ch.next = &thash[bt].ch; tip->ch.prev = thash[bt].ch.prev; ((struct taskinfo *)thash[bt].ch.prev)->ch.next = tip; thash[bt].ch.prev = tip; tip->id = id; tip->type = type; tip->btime = current->real_start_time.tv_sec + boottime.tv_sec; if (current->real_start_time.tv_nsec + boottime.tv_nsec > NSEC_PER_SEC) tip->btime++; strncpy(tip->command, current->comm, COMLEN); return tip; } /* ** garbage collector that removes: ** - exited tasks that are not by user mode programs ** - sockinfo's that are not used any more ** - taskinfo's that do not exist any more ** ** a mutex avoids that the garbage collector runs several times in parallel ** ** this function may only be called in process context! */ static void garbage_collector(void) { mutex_lock(&gclock); if (jiffies_64 < gclast + (HZ/2)) { // maximum 2 GC cycles per second mutex_unlock(&gclock); return; } gctaskexit(); // remove remaining taskinfo structs from exit list gcsockinfo(); // clean up sockinfo structs in shash list gctaskinfo(); // clean up taskinfo structs in thash list gclast = jiffies_64; mutex_unlock(&gclock); } /* ** tasks in the exitlist can be read by a user mode process for a limited ** amount of time; this function removes all taskinfo structures that have ** not been read within that period of time ** notice that exited processes are chained to the tail, so the oldest ** can be found at the head */ static void gctaskexit() { unsigned long flags; struct taskinfo *tip; spin_lock_irqsave(&exitlock, flags); for (tip=exithead; tip;) { if (jiffies_64 < tip->exittime + GCINTERVAL) break; // remove taskinfo from exitlist exithead = tip->ch.next; kmem_cache_free(ticache, tip); nre--; tip = exithead; } /* ** if list empty now, then exithead and exittail both NULL ** wakeup waiters for emptylist */ if (nre == 0) { exittail = NULL; wake_up_interruptible(&exitlist_empty); } spin_unlock_irqrestore(&exitlock, flags); } /* ** cleanup sockinfo structures that are connected to finished processes */ static void gcsockinfo() { int i; struct sockinfo *sip, *sipsave; unsigned long sflags, tflags; struct pid *pid; /* ** go through all sockinfo hash buckets */ for (i=0; i < SBUCKS; i++) { if (shash[i].ch.next == (void *)&shash[i].ch) continue; // quick return without lock spin_lock_irqsave(&shash[i].lock, sflags); sip = shash[i].ch.next; /* ** search all sockinfo structs chained in one bucket */ while (sip != (void *)&shash[i].ch) { /* ** TCP connections that were not in ** state ESTABLISHED or LISTEN can be ** eliminated */ if (sip->proto == IPPROTO_TCP) { switch (sip->last_state) { case TCP_ESTABLISHED: case TCP_LISTEN: break; default: sipsave = sip->ch.next; delete_sockinfo(sip); sip = sipsave; continue; } } /* ** check if this sockinfo has no relation ** for a while with a thread group ** if so, delete the sockinfo */ if (sip->tgp == NULL) { if (sip->lastact + GCMAXUNREF < jiffies_64) { register_unident(sip); sipsave = sip->ch.next; delete_sockinfo(sip); sip = sipsave; } else { sip = sip->ch.next; } continue; } /* ** check if referred thread group is ** already marked as 'indelete' during this ** sockinfo search ** if so, delete this sockinfo */ spin_lock_irqsave(&thash[sip->tgh].lock, tflags); if (sip->tgp->state == INDELETE) { spin_unlock_irqrestore(&thash[sip->tgh].lock, tflags); sipsave = sip->ch.next; delete_sockinfo(sip); sip = sipsave; continue; } /* ** check if referred thread group still exists; ** this step will be skipped if we already verified ** the existance of the thread group earlier during ** this garbage collection cycle */ if (sip->tgp->state != CHECKED) { /* ** connected thread group not yet verified ** during this cycle, so check if it still ** exists ** if not, mark the thread group as 'indelete' ** (it can not be deleted right now because ** we might find other sockinfo's referring ** to this thread group during the current ** cycle) and delete this sockinfo ** if the thread group exists, just mark ** it as 'checked' for this cycle */ rcu_read_lock(); pid = find_vpid(sip->tgp->id); rcu_read_unlock(); if (pid == NULL) { sip->tgp->state = INDELETE; spin_unlock_irqrestore( &thash[sip->tgh].lock, tflags); sipsave = sip->ch.next; delete_sockinfo(sip); sip = sipsave; continue; } else { sip->tgp->state = CHECKED; } } spin_unlock_irqrestore(&thash[sip->tgh].lock, tflags); /* ** check if this sockinfo has a relation with a thread ** if not, skip further handling of this sockinfo */ if (sip->thp == NULL) { sip = sip->ch.next; continue; } /* ** check if referred thread is already marked ** as 'indelete' during this sockinfo search ** if so, break connection */ spin_lock_irqsave(&thash[sip->thh].lock, tflags); if (sip->thp->state == INDELETE) { spin_unlock_irqrestore(&thash[sip->thh].lock, tflags); sip->thp = NULL; sip = sip->ch.next; continue; } /* ** check if referred thread is already checked ** during this sockinfo search */ if (sip->thp->state == CHECKED) { spin_unlock_irqrestore(&thash[sip->thh].lock, tflags); sip = sip->ch.next; continue; } /* ** connected thread not yet verified ** check if it still exists ** if not, mark it as 'indelete' and break connection ** if thread exists, mark it 'checked' */ rcu_read_lock(); pid = find_vpid(sip->thp->id); rcu_read_unlock(); if (pid == NULL) { sip->thp->state = INDELETE; sip->thp = NULL; } else { sip->thp->state = CHECKED; } spin_unlock_irqrestore(&thash[sip->thh].lock, tflags); /* ** check if a TCP port has not been used ** for some time --> destroy even if the thread ** (group) is still there */ if (sip->proto == IPPROTO_TCP && sip->lastact + GCMAXTCP < jiffies_64) { sipsave = sip->ch.next; delete_sockinfo(sip); sip = sipsave; continue; } /* ** check if a UDP port has not been used ** for some time --> destroy even if the thread ** (group) is still there ** e.g. outgoing DNS requests (to remote port 53) are ** issued every time with another source port being ** a new object that should not be kept too long; ** local well-known ports are useful to keep */ if (sip->proto == IPPROTO_UDP && sip->lastact + GCMAXUDP < jiffies_64 && sip->key.udp > 1024) { sipsave = sip->ch.next; delete_sockinfo(sip); sip = sipsave; continue; } sip = sip->ch.next; } spin_unlock_irqrestore(&shash[i].lock, sflags); } } /* ** remove taskinfo structures of finished tasks from hash list */ static void gctaskinfo() { int i; struct taskinfo *tip, *tipsave; unsigned long tflags; struct pid *pid; /* ** go through all taskinfo hash buckets */ for (i=0; i < TBUCKS; i++) { if (thash[i].ch.next == (void *)&thash[i].ch) continue; // quick return without lock spin_lock_irqsave(&thash[i].lock, tflags); tip = thash[i].ch.next; /* ** check all taskinfo structs chained to this bucket */ while (tip != (void *)&thash[i].ch) { switch (tip->state) { /* ** remove INDELETE tasks from the hash buckets ** -- move thread group to exitlist ** -- destroy thread right away */ case INDELETE: tipsave = tip->ch.next; if (tip->type == 'g') move_taskinfo(tip); // thread group else delete_taskinfo(tip); // thread tip = tipsave; break; case CHECKED: tip->state = 0; tip = tip->ch.next; break; default: // not checked yet rcu_read_lock(); pid = find_vpid(tip->id); rcu_read_unlock(); if (pid == NULL) { tipsave = tip->ch.next; if (tip->type == 'g') move_taskinfo(tip); else delete_taskinfo(tip); tip = tipsave; } else { tip = tip->ch.next; } } } spin_unlock_irqrestore(&thash[i].lock, tflags); } } /* ** remove all sockinfo structs */ static void wipesockinfo() { struct sockinfo *sip, *sipsave; int i; unsigned long sflags; for (i=0; i < SBUCKS; i++) { spin_lock_irqsave(&shash[i].lock, sflags); sip = shash[i].ch.next; /* ** free all structs chained in one bucket */ while (sip != (void *)&shash[i].ch) { sipsave = sip->ch.next; delete_sockinfo(sip); sip = sipsave; } spin_unlock_irqrestore(&shash[i].lock, sflags); } } /* ** remove all taskinfo structs from hash list */ static void wipetaskinfo() { struct taskinfo *tip, *tipsave; int i; unsigned long tflags; for (i=0; i < TBUCKS; i++) { spin_lock_irqsave(&thash[i].lock, tflags); tip = thash[i].ch.next; /* ** free all structs chained in one bucket */ while (tip != (void *)&thash[i].ch) { tipsave = tip->ch.next; delete_taskinfo(tip); tip = tipsave; } spin_unlock_irqrestore(&thash[i].lock, tflags); } } /* ** remove all taskinfo structs from exit list */ static void wipetaskexit() { gctaskexit(); } /* ** move one taskinfo struct from hash bucket to exitlist */ static void move_taskinfo(struct taskinfo *tip) { unsigned long flags; /* ** remove from hash list */ ((struct taskinfo *)tip->ch.next)->ch.prev = tip->ch.prev; ((struct taskinfo *)tip->ch.prev)->ch.next = tip->ch.next; spin_lock_irqsave(&nrtlock, flags); nrt--; spin_unlock_irqrestore(&nrtlock, flags); /* ** add to exitlist */ tip->ch.next = NULL; tip->state = FINISHED; tip->exittime = jiffies_64; spin_lock_irqsave(&exitlock, flags); if (exittail) { // list filled? exittail->ch.next = tip; exittail = tip; } else { // list empty exithead = exittail = tip; } nre++; wake_up_interruptible(&exitlist_filled); spin_unlock_irqrestore(&exitlock, flags); } /* ** remove one taskinfo struct for the hash bucket chain */ static void delete_taskinfo(struct taskinfo *tip) { unsigned long flags; ((struct taskinfo *)tip->ch.next)->ch.prev = tip->ch.prev; ((struct taskinfo *)tip->ch.prev)->ch.next = tip->ch.next; kmem_cache_free(ticache, tip); spin_lock_irqsave(&nrtlock, flags); nrt--; spin_unlock_irqrestore(&nrtlock, flags); } /* ** remove one sockinfo struct for the hash bucket chain */ static void delete_sockinfo(struct sockinfo *sip) { unsigned long flags; ((struct sockinfo *)sip->ch.next)->ch.prev = sip->ch.prev; ((struct sockinfo *)sip->ch.prev)->ch.next = sip->ch.next; kmem_cache_free(sicache, sip); spin_lock_irqsave(&nrslock, flags); nrs--; spin_unlock_irqrestore(&nrslock, flags); } /* ** read function for /proc/netatop */ static int netatop_show(struct seq_file *m, void *v) { seq_printf(m, "tcpsndpacks: %12lu (unident: %9lu)\n" "tcprcvpacks: %12lu (unident: %9lu)\n" "udpsndpacks: %12lu (unident: %9lu)\n" "udprcvpacks: %12lu (unident: %9lu)\n\n" "icmpsndpacks: %12lu\n" "icmprcvpacks: %12lu\n\n" "#sockinfo: %12lu (overflow: %8lu)\n" "#taskinfo: %12lu (overflow: %8lu)\n" "#taskexit: %12lu\n\n" "modversion: %14s\n", tcpsndpacks, unidenttcpsndpacks, tcprcvpacks, unidenttcprcvpacks, udpsndpacks, unidentudpsndpacks, udprcvpacks, unidentudprcvpacks, icmpsndpacks, icmprcvpacks, nrs, nrs_ovf, nrt, nrt_ovf, nre, NETATOPVERSION); return 0; } static int netatop_open(struct inode *inode, struct file *file) { return single_open(file, netatop_show, NULL); } /* ** called when user spce issues system call getsockopt() */ static int getsockopt(struct sock *sk, int cmd, void __user *user, int *len) { int bt; struct taskinfo *tip; char tasktype = 't'; struct netpertask npt; unsigned long tflags; /* ** verify the proper privileges */ if (!capable(CAP_NET_ADMIN)) return -EPERM; /* ** react on command */ switch (cmd) { case NETATOP_PROBE: break; case NETATOP_FORCE_GC: garbage_collector(); break; case NETATOP_EMPTY_EXIT: while (nre > 0) { if (wait_event_interruptible(exitlist_empty, nre == 0)) return -ERESTARTSYS; } break; case NETATOP_GETCNT_EXIT: if (nre == 0) wake_up_interruptible(&exitlist_empty); if (*len < sizeof(pid_t)) return -EINVAL; if (*len > sizeof npt) *len = sizeof npt; spin_lock_irqsave(&exitlock, tflags); /* ** check if an exited process is present ** if not, wait for it... */ while (nre == 0) { spin_unlock_irqrestore(&exitlock, tflags); if ( wait_event_interruptible(exitlist_filled, nre > 0)) return -ERESTARTSYS; spin_lock_irqsave(&exitlock, tflags); } /* ** get first eprocess from exitlist and remove it from there */ tip = exithead; if ( (exithead = tip->ch.next) == NULL) exittail = NULL; nre--; spin_unlock_irqrestore(&exitlock, tflags); /* ** pass relevant info to user mode ** and free taskinfo struct */ npt.id = tip->id; npt.tc = tip->tc; npt.btime = tip->btime; memcpy(npt.command, tip->command, COMLEN); if (copy_to_user(user, &npt, *len) != 0) return -EFAULT; kmem_cache_free(ticache, tip); return 0; case NETATOP_GETCNT_TGID: tasktype = 'g'; case NETATOP_GETCNT_PID: if (*len < sizeof(pid_t)) return -EINVAL; if (*len > sizeof npt) *len = sizeof npt; if (copy_from_user(&npt, user, *len) != 0) return -EFAULT; /* ** search requested id in taskinfo hash */ bt = THASH(npt.id, tasktype); // calculate hash if (thash[bt].ch.next == (void *)&thash[bt].ch) return -ESRCH; // quick return without lock spin_lock_irqsave(&thash[bt].lock, tflags); tip = thash[bt].ch.next; while (tip != (void *)&thash[bt].ch) { // is this the one? if (tip->id == npt.id && tip->type == tasktype) { /* ** found: copy results to user space */ memcpy(npt.command, tip->command, COMLEN); npt.tc = tip->tc; npt.btime = tip->btime; spin_unlock_irqrestore(&thash[bt].lock, tflags); if (copy_to_user(user, &npt, *len) != 0) return -EFAULT; else return 0; } tip = tip->ch.next; } spin_unlock_irqrestore(&thash[bt].lock, tflags); return -ESRCH; default: printk(KERN_INFO "unknown getsockopt command %d\n", cmd); return -EINVAL; } return 0; } /* ** kernel mode thread: initiate garbage collection every N seconds */ static int netatop_thread(void *dummy) { while (!kthread_should_stop()) { /* ** do garbage collection */ garbage_collector(); /* ** wait a while */ (void) schedule_timeout_interruptible(GCINTERVAL); } return 0; } /* ** called when module loaded */ int init_module() { int i; /* ** initialize caches for taskinfo and sockinfo */ ticache = kmem_cache_create("Netatop_taskinfo", sizeof (struct taskinfo), 0, 0, NULL); if (!ticache) return -EFAULT; sicache = kmem_cache_create("Netatop_sockinfo", sizeof (struct sockinfo), 0, 0, NULL); if (!sicache) { kmem_cache_destroy(ticache); return -EFAULT; } /* ** initialize hash table for taskinfo and sockinfo */ for (i=0; i < TBUCKS; i++) { thash[i].ch.next = &thash[i].ch; thash[i].ch.prev = &thash[i].ch; spin_lock_init(&thash[i].lock); } for (i=0; i < SBUCKS; i++) { shash[i].ch.next = &shash[i].ch; shash[i].ch.prev = &shash[i].ch; spin_lock_init(&shash[i].lock); } getboottime(&boottime); /* ** register getsockopt for user space communication */ if (nf_register_sockopt(&sockopts) < 0) { kmem_cache_destroy(ticache); kmem_cache_destroy(sicache); return -1; } /* ** create a new kernel mode thread for time-driven garbage collection ** after creation, the thread waits until it is woken up */ knetatop_task = kthread_create(netatop_thread, NULL, "knetatop"); if (IS_ERR(knetatop_task)) { nf_unregister_sockopt(&sockopts); kmem_cache_destroy(ticache); kmem_cache_destroy(sicache); return -1; } /* ** prepare hooks and register */ hookin_ipv4.hooknum = NF_IP_LOCAL_IN; // input packs hookin_ipv4.hook = ipv4_hookin; // func to call hookin_ipv4.pf = PF_INET; // IPV4 packets hookin_ipv4.priority = NF_IP_PRI_FIRST; // highest prio hookout_ipv4.hooknum = NF_IP_LOCAL_OUT; // output packs hookout_ipv4.hook = ipv4_hookout; // func to call hookout_ipv4.pf = PF_INET; // IPV4 packets hookout_ipv4.priority = NF_IP_PRI_FIRST; // highest prio nf_register_hook(&hookin_ipv4); // register hook nf_register_hook(&hookout_ipv4); // register hook /* ** create a /proc-entry to produce status-info on request */ proc_create("netatop", 0444, NULL, &netatop_proc_fops); /* ** all admi prepared; kick off kernel mode thread */ wake_up_process(knetatop_task); return 0; // return success } /* ** called when module unloaded */ void cleanup_module() { /* ** tell kernel daemon to stop */ kthread_stop(knetatop_task); /* ** unregister netfilter hooks and other miscellaneous stuff */ nf_unregister_hook(&hookin_ipv4); nf_unregister_hook(&hookout_ipv4); remove_proc_entry("netatop", NULL); nf_unregister_sockopt(&sockopts); /* ** destroy allocated stats */ wipesockinfo(); wipetaskinfo(); wipetaskexit(); /* ** destroy caches */ kmem_cache_destroy(ticache); kmem_cache_destroy(sicache); } netatop-0.3/netatop.h000064400000000000000000000022541221053613400146470ustar00rootroot00000000000000#define COMLEN 16 struct taskcount { unsigned long long tcpsndpacks; unsigned long long tcpsndbytes; unsigned long long tcprcvpacks; unsigned long long tcprcvbytes; unsigned long long udpsndpacks; unsigned long long udpsndbytes; unsigned long long udprcvpacks; unsigned long long udprcvbytes; /* space for future extensions */ }; struct netpertask { pid_t id; // tgid or tid (depending on command) unsigned long btime; char command[COMLEN]; struct taskcount tc; }; /* ** getsocktop commands */ #define NETATOP_BASE_CTL 15661 // just probe if the netatop module is active #define NETATOP_PROBE (NETATOP_BASE_CTL) // force garbage collection to make finished processes available #define NETATOP_FORCE_GC (NETATOP_BASE_CTL+1) // wait until all finished processes are read (blocks until done) #define NETATOP_EMPTY_EXIT (NETATOP_BASE_CTL+2) // get info for finished process (blocks until available) #define NETATOP_GETCNT_EXIT (NETATOP_BASE_CTL+3) // get counters for thread group (i.e. process): input is 'id' (pid) #define NETATOP_GETCNT_TGID (NETATOP_BASE_CTL+4) // get counters for thread: input is 'id' (tid) #define NETATOP_GETCNT_PID (NETATOP_BASE_CTL+5) netatop-0.3/netatop.init000075500000000000000000000017231221053613400153660ustar00rootroot00000000000000#!/bin/sh # # netatop Startup script for the netatop kernel module and daemon # # chkconfig: 2345 84 16 # description: Gather per-process statistics about network utilization # ### BEGIN INIT INFO # Provides: netatop # Required-Start: $local_fs # Required-Stop: $local_fs # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Short-Description: Gather per-process statistics about network utilization # Description: Gather per-process statistics about network utilization ### END INIT INFO RETVAL=0 start() { modprobe netatop if [ -f /usr/sbin/netatopd ]; then /usr/sbin/netatopd fi } stop() { PID=$(ps -e | grep netatopd | sed -e 's/^ *//' -e 's/ .*//') if [ "$PID" ] then kill "$PID" fi modprobe -r netatop } # See how we were called. case "$1" in start) start ;; stop) stop ;; status) cat /proc/netatop ;; reload) stop start ;; restart) stop start ;; *) echo "Usage: $0 [start|stop|status|reload|restart]" exit 1 esac netatop-0.3/netatopd.h000064400000000000000000000006531221053613400150140ustar00rootroot00000000000000#define SEMAKEY 1541961 #define NETEXITFILE "/var/run/netatop.log" #define MYMAGIC (unsigned int) 0xfeedb0b0 struct naheader { u_int32_t magic; // magic number MYMAGIC u_int32_t curseq; // sequence number of last netpertask u_int16_t hdrlen; // length of this header u_int16_t ntplen; // length of netpertask structure pid_t mypid; // PID of netatopd itself }; netatop-0.3/netatopversion.h000064400000000000000000000001071221053613400162500ustar00rootroot00000000000000#define NETATOPVERSION "0.3" #define NETATOPDATE "2013/07/19 11:33:59"