ev->pagefault.write? «W»: «R»);
break;
case SWAP_PF:
seq_printf (m, «swapfile page @%p (%s)\n», ev->pagefault.addr,
ev->pagefault.write? «W»: «R»);
break;
case FILE_PF:
seq_printf (m, «shared file page @%p (%s)\n», ev->pagefault.addr,
ev->pagefault.write? «W»: «R»);
break;
case SYSCALLRET:
seq_printf (m, «%s ->%ld (%p)\n», ev->callret.callname, ev->callret.ret,
(void *) ev->callret.ret);
break;
default:
printk («memmon: Unexpected event% d\n», ev->type);
return 1;
}
return 0;
}
/*** Exported entries ***/
/*
* Initializes event ringbuffer & creates /proc entry
*/
int init_events(void)
{
struct proc_dir_entry *entry;
buflen = max (buflen, MIN_EVENTS_BUFLEN);
events = kzalloc (buflen, GFP_KERNEL);
if (! events)
{
printk («memmon: Event ringbuffer too big!\n»);
return 0;
}
ev_start = ev_end = 0;
entry = create_proc_entry (EVENTS_ENTRY, 0444, procdir);
if (entry)
entry->proc_fops = &events_fops;
else
{
kfree(events);
return 0;
}
return 1;
}
/*
* Destroys ringbuffer & removes /proc entry
*/
void fini_events(void)
{
remove_proc_entry (EVENTS_ENTRY, procdir);
kfree(events);
}
/*
* Adds events to ringbuffer tail
*/
void put_event (const struct memmon_event *ev)
{
spin_lock (&ev_lock);
events [ev_end] = *ev;
/* Overflow */
if (++ev_end > NEVENTS)
{
ev_start = ev_end = 0;
ev_ovf = 1;
}
/*
* If overflow happened at least once, ev_start must be next to ev_end.
* Otherwise, it remains zero.
*/
if (ev_ovf && ++ev_start > NEVENTS)
ev_start = 0;
spin_unlock (&ev_lock);
wake_up_interruptible_sync (&ev_waitq);
}
watch-pids.h
/*
* Selection of PIDs to watch for.
*/
#ifndef MEMMON_WATCH_PIDS_H
#define MEMMON_WATCH_PIDS_H
/*
* Checks whether PID @pid is present in PID set
* Returns 1 if present
*/
int pid_present (pid_t pid);
/*
* Initializes PID set & creates /proc entry
*/
int init_watch_pids(void);
/*
* Destroys PID set & removes /proc entry
*/
void fini_watch_pids(void);
#endif // MEMMON_WATCH_PIDS_H
watch-pids.c
/*
* Selection of PIDs to watch for.
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <linux/bitmap.h>
#include <asm/uaccess.h>
#include <asm/bitops.h>
#include «common.h»
#include «watch-pids.h»
/*** Forward declarations ***/
static int watch_pids_open (struct inode *i, struct file *filp);
static int watch_pids_release (struct inode *i, struct file *filp);
static ssize_t watch_pids_read (struct file *filp, char __user *buf, size_t count, loff_t *off);
static ssize_t watch_pids_write (struct file *filp, const char __user *buf,
size_t count, loff_t *offp);
/*** Internal data ***/
/* Filename in procfs directory */
#define WATCHPID_ENTRY «watch-pids»
#define PID_COUNT PID_MAX_DEFAULT + 1
/* PIDs are stored in one single bitmap for 8192 entries
* This is VERY RARELY unacceptable */
static DECLARE_BITMAP (watched_pids, PID_COUNT);
/*** File operations ***/
static const struct file_operations watch_pids_fops =
{
owner = THIS_MODULE,
open = watch_pids_open,
read = watch_pids_read,
write = watch_pids_write,
release = watch_pids_release
};
/*** Entry points ***/
/*
* open() handler
*/
static int watch_pids_open (struct inode *i, struct file *filp)
{
try_module_get (THIS_MODULE);
/*
* If file opened for read, print PID set to internal buffer
*/
if (filp->f_mode & FMODE_READ)
{
const int FDATA_SIZ = 32*1024;
char *fdata;
int len;
/*
* Disallow mixed RW-access
*/
if (filp->f_mode & FMODE_WRITE)
return – EINVAL;
fdata = kzalloc (FDATA_SIZ, GFP_KERNEL);
len = bitmap_scnlistprintf (fdata, FDATA_SIZ – 1,
watched_pids, PID_COUNT);
/* Append \n */
if (len)
{
fdata [len++] = '\n';
fdata[len] = 0;
}
filp->private_data = fdata;
}
return 0;
}
/*
* close() handler
*/
static int watch_pids_release (struct inode *i, struct file *filp)
{
module_put (THIS_MODULE);
if (filp->private_data)
kfree (filp->private_data);
return 0;
}
/*
* read() handler – simply return chunk of data from
* previously allocated and formatted buffer
*/
static ssize_t watch_pids_read (struct file *filp, char __user *buf,
size_t count, loff_t *offp)
{
size_t len = strlen (filp->private_data);
char *fdata = filp->private_data;
if (*offp >= len)
return 0;
len = min (count, len – (size_t) (*offp));
if (copy_to_user (buf, fdata + (*offp), len))
return – EFAULT;
*offp += len;
return len;
}
/*
* write() handler
* Buffer must hold ASCII representation of single integer
* if positive, it's value is PID to add to set
* if negative, it's absolute value is PID to remove from set
* if zero, PID set is cleared
*/
static ssize_t watch_pids_write (struct file *filp, const char __user *buf,
size_t count, loff_t *offp)
{
const size_t maxlen = 4096;
size_t len;
pid_t new_pid;
char *data;
ssize_t res = – ENOMEM;
/* copy up to one page to our buffer */
len = min (maxlen, count);
data = kzalloc (len, GFP_KERNEL);
if (unlikely(! data))
return – ENOMEM;
if (copy_from_user (data, buf, len))
res = – EFAULT;
else if ((sscanf (data, «%d», &new_pid) == 1) &&
new_pid <= PID_COUNT && new_pid >= – PID_COUNT)
{
if (new_pid > 0)
set_bit (new_pid, watched_pids);
else if (new_pid < 0)
clear_bit (-new_pid, watched_pids);
else
bitmap_zero (watched_pids, PID_COUNT);
res = len;
}
else
/* buffer doesn't represent a number in PID range */
res = – EIO;
kfree(data);
return res;
}
/*** Exported entries ***/
/*
* Checks whether PID @pid is present in PID set
* Returns 1 if present
*/
int pid_present (pid_t pid)
{
if (pid > PID_COUNT || pid <= 0)
return 0;
return test_bit (pid, watched_pids)? 1: 0;
}
/*
* Initializes PID set & creates /proc entry
*/
int init_watch_pids(void)
{
struct proc_dir_entry *entry;
entry = create_proc_entry (WATCHPID_ENTRY, 0666, procdir);
if (entry)
entry->proc_fops = &watch_pids_fops;
else
return 0;
bitmap_zero (watched_pids, PID_COUNT);
return 1;
}
/*
* Destroys PID set & removes /proc entry
*/
void fini_watch_pids(void)
{
remove_proc_entry (WATCHPID_ENTRY, procdir);
}
syscalls.h
/*
* Syscall capture facility.
*/
#ifndef MEMMON_SYSCALLS_H
#define MEMMON_SYSCALLS_H
/*
* Installs handlers.
*/
int capture_syscalls(void);
/*
* Uninstalls handlers
*/
void restore_syscalls(void);
#endif //MEMMON_SYSCALLS_H
syscalls.c
/*
* Syscall capture facility.
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include «common.h»
#include «syscalls.h»
#include «events.h»
#include «watch-pids.h»
/*** Syscalls ***/
/*
* They just put an appropriate event into ringbuffer
*/
asmlinkage void sys2_mmap2 (void __user *start, size_t length,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
{
struct memmon_event ev = {.type = MMAP2.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.mmap2.start = start;
ev.mmap2.len = length;
ev.mmap2.prot = prot >> 3;
ev.mmap2.flags = flags;
ev.mmap2.fd = fd;
ev.mmap2.off = pgoff;
put_event(&ev);
}
asmlinkage void sys2_mmap2_exit (long ret)
{
struct memmon_event ev = {.type = SYSCALLRET.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.callret.callname = «mmap»;
ev.callret.ret = ret;
put_event(&ev);
}
asmlinkage void sys2_munmap (void __user *start, size_t length)
{
struct memmon_event ev = {.type = MUNMAP.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.munmap.start = start;
ev.munmap.len = length;
put_event(&ev);
}
asmlinkage void sys2_munmap_exit (long ret)
{
struct memmon_event ev = {.type = SYSCALLRET.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.callret.callname = «munmap»;
ev.callret.ret = ret;
put_event(&ev);
}
asmlinkage void sys2_mremap (void __user *addr1, size_t length1,
unsigned long length2, unsigned long flags,
void __user *addr2)
{
struct memmon_event ev = {.type = MREMAP.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.mremap.start[0] = addr1;
ev.mremap.start[1] = addr2;
ev.mremap.len[0] = length1;
ev.mremap.len[1] = length2;
ev.mremap.flags = flags;
put_event(&ev);
}
asmlinkage void sys2_mremap_exit (long ret)
{
struct memmon_event ev = {.type = SYSCALLRET.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.callret.callname = «mremap»;
ev.callret.ret = ret;
put_event(&ev);
}
asmlinkage void sys2_mlock (void __user *start, size_t length)
{
struct memmon_event ev = {.type = MLOCK.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.mlock.start = start;
ev.mlock.len = length;
put_event(&ev);
}
asmlinkage void sys2_mlock_exit (long ret)
{
struct memmon_event ev = {.type = SYSCALLRET.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.callret.callname = «mlock»;
ev.callret.ret = ret;
put_event(&ev);
}
asmlinkage void sys2_munlock (void __user *start, size_t length)
{
struct memmon_event ev = {.type = MUNLOCK.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.munlock.start = start;
ev.munlock.len = length;
put_event(&ev);
}
asmlinkage void sys2_munlock_exit (long ret)
{
struct memmon_event ev = {.type = SYSCALLRET.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.callret.callname = «munlock»;
ev.callret.ret = ret;
put_event(&ev);
}
asmlinkage void sys2_mlockall (unsigned long flags)
{
struct memmon_event ev = {.type = MLOCKALL.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.mlockall.flags = flags;
put_event(&ev);
}
asmlinkage void sys2_mlockall_exit (long ret)
{
struct memmon_event ev = {.type = SYSCALLRET.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.callret.callname = «mlockall»;
ev.callret.ret = ret;
put_event(&ev);
}
asmlinkage void sys2_munlockall()
{
struct memmon_event ev = {.type = MUNLOCKALL.pid = current->pid};
if (! pid_present (ev.pid)) return;
put_event(&ev);
}
asmlinkage void sys2_munlockall_exit (long ret)
{
struct memmon_event ev = {.type = SYSCALLRET.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.callret.callname = «munlockall»;
ev.callret.ret = ret;
put_event(&ev);
}
asmlinkage void sys2_brk (void __user *start)
{
struct memmon_event ev = {.type = BRK.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.brk.addr = start;
put_event(&ev);
}
asmlinkage void sys2_brk_exit (long ret)
{
struct memmon_event ev = {.type = SYSCALLRET.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.callret.callname = «brk»;
ev.callret.ret = ret;
put_event(&ev);
}
asmlinkage void sys2_fsync (int fd)
{
struct memmon_event ev = {.type = FSYNC.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.fsync.fd = fd;
put_event(&ev);
}
asmlinkage void sys2_fsync_exit (long ret)
{
struct memmon_event ev = {.type = SYSCALLRET.pid = current->pid};
if (! pid_present (ev.pid)) return;
ev.callret.callname = «fsync»;
ev.callret.ret = ret;
put_event(&ev);
}
/*** Handler tables ***/
/* Kernel syscall table */
extern void *sys_call_table[];
/* Our table w/saved offsets */
void *old_sys_call [NR_syscalls];
/* Our pre-call handlers */
void *sys_call_trap [NR_syscalls];
/* Our post-call handlers */
void *sys_call_exit [NR_syscalls];
/*
* Struct describind our handler
*/
struct syscall_handler
{
/* Syscall nr */
int nr;
/* Pre-call & post-call handler */
void *hand1, *hand2;
};
#define SYSCALL_HANDLER(name) {__NR_##name, sys2_##name, sys2_##name##_exit}
#define SYSCALL_HANDLERS_END() {0, 0, 0}
/*
* Main handler table
* Each SYSCALL_HANDLER(name) entry installs handlers
* «sys2_name/sys2_name_exit for sys_name call.
*/
struct syscall_handler syscalls[] =
{
SYSCALL_HANDLER(mmap2),
SYSCALL_HANDLER(munmap),
SYSCALL_HANDLER(mremap),
SYSCALL_HANDLER(mlock),
SYSCALL_HANDLER(munlock),
SYSCALL_HANDLER(mlockall),
SYSCALL_HANDLER(munlockall),
SYSCALL_HANDLER(brk),
SYSCALL_HANDLER(fsync),
SYSCALL_HANDLERS_END()
};
/* Located in syscall-entry.S */
void syscalls_entry(void);
/*** Exported entries ***/
/*
* Installs handlers.
*/
int capture_syscalls(void)
{
int i;
for (i = 0; syscalls[i].hand1; ++i)
{
int nr = syscalls[i].nr;
sys_call_trap[nr] = syscalls[i].hand1;
sys_call_exit[nr] = syscalls[i].hand2;
old_sys_call[nr] = sys_call_table[nr];
sys_call_table[nr] = syscalls_entry;
}
return 1;
}
/*
* Uninstalls handlers
*/
void restore_syscalls(void)
{
int i;
for (i = 0; syscalls[i].hand1; ++i)
{
int nr = syscalls[i].nr;
sys_call_table[nr] = old_sys_call[nr];
}
}
syscalls-entry.S
/*
* Syscall entry/exit capture
*/
#include «offsets.h»
/* Entry handler table */
extern sys_call_trap
/* Exit handler table */
extern sys_call_exit
/* Global entry for our syscalls */
syscalls_entry:
/* Save registers in order syscall handlers expect 'em */
pushl %eax
pushl %ebp
pushl %edi
pushl %esi
pushl %edx
pushl %ecx
pushl %ebx
/* Save eax */
movl %eax, TI_stk0 (%ebp)
/* Call our handler */
call *sys_call_trap (,%eax, 4)
/* Fake return address */
movl 28 (%esp),%eax
movl %eax, TI_stk0 + 4 (%ebp)
movl $sysreturn, 28 (%esp)
/* Restore context */
popl %ebx
popl %ecx
popl %edx
popl %esi
popl %edi
popl %ebp
popl %eax
/* Jump to default system handler */
jmpl *old_sys_call (,%eax, 4)
sysreturn:
/* Save registers */
pushal
/* Pass new% eax to exit handler */
pushl %eax
/* Restore original% eax */
movl TI_stk0 (%ebp),%eax
/* Call our exit handler */
call *sys_call_exit (,%eax, 4)
/* Restore context */
popl %eax
popal
/* Jump back to syscall dispatcher entry */
jmpl *TI_stk0 + 4 (%ebp)
globl syscalls_entry
gen-offsets.c
//
#define __KERNEL__
/* bugoga */
#include <linux/kernel.h>
#include <linux/autoconf.h>
#include <linux/thread_info.h>
#include <stdio.h>
int main()
{
printf («#define TI_stk0% d\n», offsetof (struct thread_info, supervisor_stack));
return 0;
}
mm-fault.h
/*
* Pagefault interception.