diff -urNp linux-2.4.7.SuSE-orig/Documentation/Configure.help linux-2.4.7.SuSE/Documentation/Configure.help --- linux-2.4.7.SuSE-orig/Documentation/Configure.help Tue Oct 30 16:43:42 2001 +++ linux-2.4.7.SuSE/Documentation/Configure.help Mon Jun 24 11:52:47 2002 @@ -18812,6 +18812,44 @@ CONFIG_IA64_PALINFO To use this option, you have to check that the "/proc file system support" (CONFIG_PROC_FS) is enabled, too. +Kernel events tracing support +CONFIG_TRACE + It is possible for the kernel to log important events to a tracing + driver. Doing so, enables the use of the generated traces in order + to reconstruct the dynamic behavior of the kernel, and hence the + whole system. + + The tracing process contains 4 parts : + 1) The logging of events by key parts of the kernel. + 2) The trace driver that keeps the events in a data buffer. + 3) A trace daemon that opens the trace driver and is notified + every time there is a certain quantity of data to read + from the trace driver (using SIG_IO). + 4) A trace event data decoder that reads the accumulated data + and formats it in a human-readable format. + + If you say Y or M here, the first part of the tracing process will + always take place. That is, critical parts of the kernel will call + upon the kernel tracing function. The data generated doesn't go + any further until a trace driver registers himself as such with the + kernel. Therefore, if you answer Y, then the driver will be part of + the kernel and the events will always proceed onto the driver and + if you say M, then the events will only proceed onto the driver when + it's module is loaded. Note that event's aren't logged in the driver + until the profiling daemon opens the device, configures it and + issues the "start" command through ioctl(). + + The impact of a fully functionnal system (kernel event logging + + driver event copying + active trace daemon) is of 2.5% for core events. + This means that for a task that took 100 seconds on a normal system, it + will take 102.5 seconds on a traced system. This is very low compared + to other profiling or tracing methods. + + For more information on kernel tracing, the trace daemon or the event + decoder, please check the following address : + http://www.opersys.com/LTT + + # # A couple of things I keep forgetting: # capitalize: AppleTalk, Ethernet, DOS, DMA, FAT, FTP, Internet, diff -urNp linux-2.4.7.SuSE-orig/Makefile linux-2.4.7.SuSE/Makefile --- linux-2.4.7.SuSE-orig/Makefile Tue Oct 30 16:43:44 2001 +++ linux-2.4.7.SuSE/Makefile Mon Jun 24 11:52:47 2002 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 7 -EXTRAVERSION = +EXTRAVERSION =-SuSE # SUBARCH tells the usermode build what the underlying arch is. That is set # first, and if a usermode build is happening, the "ARCH=um" on the command @@ -215,6 +215,7 @@ DRIVERS-$(CONFIG_I2C) += drivers/i2c/i2c DRIVERS-$(CONFIG_PHONE) += drivers/telephony/telephony.o DRIVERS-$(CONFIG_MD) += drivers/md/mddev.o DRIVERS-$(CONFIG_BLUEZ) += drivers/bluetooth/bluetooth.o +DRIVERS-$(CONFIG_TRACE) += drivers/trace/trace_driver.o DRIVERS := $(DRIVERS-y) diff -urNp linux-2.4.7.SuSE-orig/arch/s390/config.in linux-2.4.7.SuSE/arch/s390/config.in --- linux-2.4.7.SuSE-orig/arch/s390/config.in Tue Oct 30 16:43:37 2001 +++ linux-2.4.7.SuSE/arch/s390/config.in Mon Jun 24 11:52:47 2002 @@ -62,6 +62,11 @@ fi source fs/Config.in mainmenu_option next_comment +comment 'Kernel tracing' +tristate 'Kernel events tracing support' CONFIG_TRACE +endmenu + +mainmenu_option next_comment comment 'Kernel hacking' #bool 'Debug kmalloc/kfree' CONFIG_DEBUG_MALLOC diff -urNp linux-2.4.7.SuSE-orig/arch/s390/kernel/entry.S linux-2.4.7.SuSE/arch/s390/kernel/entry.S --- linux-2.4.7.SuSE-orig/arch/s390/kernel/entry.S Tue Oct 30 16:43:31 2001 +++ linux-2.4.7.SuSE/arch/s390/kernel/entry.S Mon Jun 24 11:52:47 2002 @@ -7,6 +7,7 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Portions added by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation */ #define ASSEMBLY @@ -218,6 +219,14 @@ pgm_system_call: slr %r8,%r8 # gpr 8 is call save (-> tracesys) ic %r8,0x8B # get svc number from lowcore stosm 24(%r15),0x03 # reenable interrupts +/* call to ltt trace done here. R8 has the syscall (svc) number to trace */ +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) /* tjh - ltt port */ + /* add call to trace_real_syscall_entry */ + la %r2,SP_PTREGS(%r15) # load pt_regs as first parameter + l %r1,BASED(.Ltracesysent) + basr %r14,%r1 + lm %r0,%r6,SP_R0(%r15) /* restore call clobbered regs tjh */ +#endif sll %r8,2 l %r8,sys_call_table-entry_base(8,%r13) # get address of system call tm tsk_ptrace+3(%r9),0x02 # PT_TRACESYS @@ -227,6 +236,13 @@ pgm_system_call: # ATTENTION: check sys_execve_glue before # changing anything here !! +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) /* tjh - ltt port * + /* add call to trace_real_syscall_exit */ + la %r2,SP_PTREGS(%r15) # load pt_regs as first parameter + l %r1,BASED(.Ltracesysext) + basr %r14,%r1 + lm %r0,%r6,SP_R0(%r15) /* restore call clobbered regs */ +#endif sysc_return: tm SP_PSW+1(%r15),0x01 # returning to user ? bno BASED(sysc_leave) # no-> skip resched & signal @@ -895,6 +911,8 @@ restart_go: .Lsigaltstack: .long sys_sigaltstack .Ltrace: .long syscall_trace .Lvfork: .long sys_vfork +.Ltracesysent: .long trace_real_syscall_entry +.Ltracesysext: .long trace_real_syscall_exit #ifdef CONFIG_SMP .Lschedtail: .long schedule_tail diff -urNp linux-2.4.7.SuSE-orig/arch/s390/kernel/process.c linux-2.4.7.SuSE/arch/s390/kernel/process.c --- linux-2.4.7.SuSE-orig/arch/s390/kernel/process.c Tue Oct 30 16:43:31 2001 +++ linux-2.4.7.SuSE/arch/s390/kernel/process.c Mon Jun 24 11:52:48 2002 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -265,6 +266,10 @@ int kernel_thread(int (*fn)(void *), voi : "d" (clone_arg), "i" (__NR_clone), "i" (__NR_exit), "d" (arg), "d" (fn), "i" (__LC_KERNEL_STACK) , "i" (-STACK_FRAME_OVERHEAD) : "2", "3", "4" ); +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) + if (retval > 0) + TRACE_PROCESS(TRACE_EV_PROCESS_KTHREAD, retval, (int) fn); +#endif return retval; } diff -urNp linux-2.4.7.SuSE-orig/arch/s390/kernel/sys_s390.c linux-2.4.7.SuSE/arch/s390/kernel/sys_s390.c --- linux-2.4.7.SuSE-orig/arch/s390/kernel/sys_s390.c Mon Mar 19 15:35:11 2001 +++ linux-2.4.7.SuSE/arch/s390/kernel/sys_s390.c Mon Jun 24 11:52:48 2002 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -144,6 +145,8 @@ asmlinkage int sys_ipc (uint call, int f { struct ipc_kludge tmp; int ret; + + TRACE_IPC(TRACE_EV_IPC_CALL, call, first); switch (call) { case SEMOP: diff -urNp linux-2.4.7.SuSE-orig/arch/s390/kernel/traps.c linux-2.4.7.SuSE/arch/s390/kernel/traps.c --- linux-2.4.7.SuSE-orig/arch/s390/kernel/traps.c Tue Oct 30 16:43:31 2001 +++ linux-2.4.7.SuSE/arch/s390/kernel/traps.c Mon Jun 24 11:52:48 2002 @@ -5,6 +5,7 @@ * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Portions added by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation * * Derived from "arch/i386/kernel/traps.c" * Copyright (C) 1991, 1992 Linus Torvalds @@ -26,6 +27,7 @@ #include #include #include +#include #include #include @@ -92,12 +94,18 @@ asmlinkage void name(struct pt_regs * re static void inline do_trap(long interruption_code, int signr, char *str, struct pt_regs *regs, siginfo_t *info) { + uint64_t ltt_interruption_code; + char * ic_ptr = (char *) <t_interruption_code; + /* * We got all needed information from the lowcore and can * now safely switch on interrupts. */ if (regs->psw.mask & PSW_PROBLEM_STATE) __sti(); + memset(<t_interruption_code,0,sizeof(ltt_interruption_code)); + memcpy(ic_ptr+4,&interruption_code,sizeof(interruption_code)); + TRACE_TRAP_ENTRY(ltt_interruption_code, (regs->psw.addr & PSW_ADDR_MASK)); if (regs->psw.mask & PSW_PROBLEM_STATE) { struct task_struct *tsk = current; @@ -127,6 +135,7 @@ static void inline do_trap(long interrup else die(str, regs, interruption_code); } + TRACE_TRAP_EXIT(); } int do_debugger_trap(struct pt_regs *regs,int signal) @@ -165,6 +174,8 @@ asmlinkage void illegal_op(struct pt_reg { __u8 opcode[6]; __u16 *location; + uint64_t ltt_interruption_code; + char * ic_ptr = (char *) <t_interruption_code; int signal = 0; location = (__u16 *)(regs->psw.addr-S390_lowcore.pgm_ilc); @@ -175,6 +186,9 @@ asmlinkage void illegal_op(struct pt_reg */ if (regs->psw.mask & PSW_PROBLEM_STATE) __sti(); + memset(<t_interruption_code,0,sizeof(ltt_interruption_code)); + memcpy(ic_ptr+4,&interruption_code,sizeof(interruption_code)); + TRACE_TRAP_ENTRY(ltt_interruption_code, (regs->psw.addr & PSW_ADDR_MASK)); if (regs->psw.mask & PSW_PROBLEM_STATE) get_user(*((__u16 *) opcode), location); @@ -217,6 +231,7 @@ asmlinkage void illegal_op(struct pt_reg } else if (signal) do_trap(interruption_code, signal, "illegal operation", regs, NULL); + TRACE_TRAP_EXIT(); } @@ -227,6 +242,8 @@ specification_exception(struct pt_regs * { __u8 opcode[6]; __u16 *location = NULL; + uint64_t ltt_interruption_code; + char * ic_ptr = (char *) <t_interruption_code; int signal = 0; location = (__u16 *)(regs->psw.addr-S390_lowcore.pgm_ilc); @@ -237,6 +254,9 @@ specification_exception(struct pt_regs * */ if (regs->psw.mask & PSW_PROBLEM_STATE) __sti(); + memset(<t_interruption_code,0,sizeof(ltt_interruption_code)); + memcpy(ic_ptr+4,&interruption_code,sizeof(interruption_code)); + TRACE_TRAP_ENTRY(ltt_interruption_code, (regs->psw.addr & PSW_ADDR_MASK)); if (regs->psw.mask & PSW_PROBLEM_STATE) { get_user(*((__u16 *) opcode), location); @@ -276,6 +296,7 @@ specification_exception(struct pt_regs * } else if (signal) do_trap(interruption_code, signal, "specification exception", regs, NULL); + TRACE_TRAP_EXIT(); } #else DO_ERROR(SIGILL, "specification exception", specification_exception) @@ -285,6 +306,8 @@ asmlinkage void data_exception(struct pt { __u8 opcode[6]; __u16 *location; + uint64_t ltt_interruption_code; + char * ic_ptr = (char *) <t_interruption_code; int signal = 0; location = (__u16 *)(regs->psw.addr-S390_lowcore.pgm_ilc); @@ -295,6 +318,9 @@ asmlinkage void data_exception(struct pt */ if (regs->psw.mask & PSW_PROBLEM_STATE) __sti(); + memset(<t_interruption_code,0,sizeof(ltt_interruption_code)); + memcpy(ic_ptr+4,&interruption_code,sizeof(interruption_code)); + TRACE_TRAP_ENTRY(ltt_interruption_code, (regs->psw.addr & PSW_ADDR_MASK)); if (MACHINE_HAS_IEEE) __asm__ volatile ("stfpc %0\n\t" @@ -365,6 +391,7 @@ asmlinkage void data_exception(struct pt } else if (signal) do_trap(interruption_code, signal, "data exception", regs, NULL); + TRACE_TRAP_EXIT(); } @@ -417,6 +444,11 @@ void __init trap_init(void) void handle_per_exception(struct pt_regs *regs) { + uint64_t ltt_interruption_code; + char * ic_ptr = (char *) <t_interruption_code; + memset(<t_interruption_code,0,sizeof(ltt_interruption_code)); + memcpy(ic_ptr+6,&S390_lowcore.pgm_code,2); /* copy the interrupt code */ + TRACE_TRAP_ENTRY(ltt_interruption_code,(regs->psw.addr & PSW_ADDR_MASK)); if(regs->psw.mask&PSW_PROBLEM_STATE) { per_struct *per_info=¤t->thread.per_info; @@ -433,5 +465,91 @@ void handle_per_exception(struct pt_regs /* Hopefully switching off per tracing will help us survive */ regs->psw.mask &= ~PSW_PER_MASK; } + TRACE_TRAP_EXIT(); } +/* ltt - Trace related code */ +#if (CONFIG_TRACE || CONFIG_TRACE_MODULE) +asmlinkage void trace_real_syscall_entry(struct pt_regs * regs) +{ + int use_depth; + int use_bounds; + int depth = 0; + int seek_depth; + unsigned long lower_bound; + unsigned long upper_bound; + unsigned long addr; + unsigned long* stack; + unsigned long temp_stack; + trace_syscall_entry trace_syscall_event; + /* Set the syscall ID */ + /* Register 8 is setup just prior to the call */ + /* This instruction is just following linkage */ + /* so it's ok. If moved and chance of R8 being */ + /* clobbered, would need to dig it out of the stack */ + __asm__ volatile( + " stc 8,%0\n\t" + : "=m" (trace_syscall_event.syscall_id)); + /* get the psw address */ + trace_syscall_event.address = regs->psw.addr; + /* and off the hi-order bit */ + trace_syscall_event.address &= PSW_ADDR_MASK; + if(!(user_mode(regs))) /* if kernel mode, return */ + goto trace_syscall_end; + /* Get the trace configuration - if none, return */ + if(trace_get_config(&use_depth, + &use_bounds, + &seek_depth, + (void*)&lower_bound, + (void*)&upper_bound) < 0) + goto trace_syscall_end; + /* Do we have to search for an instruction pointer address range */ + if((use_depth == 1) || (use_bounds == 1)) + { + /* Start at the top of the stack */ + /* stack pointer is register 15 */ + stack = (unsigned long*) regs->gprs[15]; /* stack pointer */ + /* Keep on going until we reach the end of the process' stack limit */ + do + { + get_user(addr,stack+14); /* get the program address +0x38 */ + /* and off the hi-order bit */ + addr &= PSW_ADDR_MASK; + /* Does this LOOK LIKE an address in the program */ + if ((addr > current->mm->start_code) + &&(addr < current->mm->end_code)) + { + /* Does this address fit the description */ + if(((use_depth == 1) && (depth == seek_depth)) + ||((use_bounds == 1) && (addr > lower_bound) + && (addr < upper_bound))) + { + /* Set the address */ + trace_syscall_event.address = addr; + /* We're done */ + goto trace_syscall_end; + } + else + /* We're one depth more */ + depth++; + } + /* Go on to the next address */ + get_user(temp_stack,stack); /* get contents of stack */ + temp_stack &= PSW_ADDR_MASK; /* and off hi order bit */ + stack = (unsigned long *)temp_stack; /* move into stack */ + /* stack may or may not go to zero when end hit */ + /* using 0x7fffffff-_STK_LIM to validate that the address is */ + /* within the range of a valid stack address */ + /* If outside that range, exit the loop, stack end must have */ + /* been hit. */ + } while (stack >= (unsigned long *)(0x7fffffff-_STK_LIM)); + } +trace_syscall_end: + /* Trace the event */ + trace_event(TRACE_EV_SYSCALL_ENTRY, &trace_syscall_event); +} +asmlinkage void trace_real_syscall_exit(void) +{ + trace_event(TRACE_EV_SYSCALL_EXIT, NULL); +} +#endif /* (CONFIG_TRACE || CONFIG_TRACE_MODULE) */ diff -urNp linux-2.4.7.SuSE-orig/arch/s390/mm/fault.c linux-2.4.7.SuSE/arch/s390/mm/fault.c --- linux-2.4.7.SuSE-orig/arch/s390/mm/fault.c Tue Oct 30 16:43:37 2001 +++ linux-2.4.7.SuSE/arch/s390/mm/fault.c Mon Jun 24 11:52:48 2002 @@ -5,6 +5,7 @@ * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Hartmut Penner (hp@de.ibm.com) * Ulrich Weigand (uweigand@de.ibm.com) + * Portions added by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation * * Derived from "arch/i386/mm/fault.c" * Copyright (C) 1995 Linus Torvalds @@ -25,6 +26,8 @@ #include #include +#include + #include #include #include @@ -57,6 +60,8 @@ asmlinkage void do_page_fault(struct pt_ int write; int si_code = SEGV_MAPERR; int kernel_address = 0; + uint64_t ltt_interruption_code; + char * ic_ptr = (char *) <t_interruption_code; tsk = current; mm = tsk->mm; @@ -142,6 +147,9 @@ asmlinkage void do_page_fault(struct pt_ __sti(); + memset(<t_interruption_code,0,sizeof(ltt_interruption_code)); + memcpy(ic_ptr+4,&error_code,sizeof(error_code)); + TRACE_TRAP_ENTRY(ltt_interruption_code,(regs->psw.addr & PSW_ADDR_MASK)); down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -194,6 +202,7 @@ good_area: } up_read(&mm->mmap_sem); + TRACE_TRAP_EXIT(); return; /* @@ -223,6 +232,7 @@ bad_area: #endif force_sigsegv(tsk, si_code, (void *)address); + TRACE_TRAP_EXIT(); return; } @@ -230,6 +240,7 @@ no_context: /* Are we prepared to handle this kernel fault? */ if ((fixup = search_exception_table(regs->psw.addr)) != 0) { regs->psw.addr = fixup; + TRACE_TRAP_EXIT(); return; } @@ -274,6 +285,8 @@ do_sigbus: /* Kernel mode? Handle exceptions or die */ if (!(regs->psw.mask & PSW_PROBLEM_STATE)) goto no_context; + + TRACE_TRAP_EXIT(); } /* diff -urNp linux-2.4.7.SuSE-orig/arch/s390/mm/fault.c.orig linux-2.4.7.SuSE/arch/s390/mm/fault.c.orig --- linux-2.4.7.SuSE-orig/arch/s390/mm/fault.c.orig Wed Dec 31 19:00:00 1969 +++ linux-2.4.7.SuSE/arch/s390/mm/fault.c.orig Tue Oct 30 16:43:37 2001 @@ -0,0 +1,536 @@ +/* + * arch/s390/mm/fault.c + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Hartmut Penner (hp@de.ibm.com) + * Ulrich Weigand (uweigand@de.ibm.com) + * + * Derived from "arch/i386/mm/fault.c" + * Copyright (C) 1995 Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef CONFIG_SYSCTL +extern int sysctl_userprocess_debug; +#endif + +extern void die(const char *,struct pt_regs *,long); +static void force_sigsegv(struct task_struct *tsk, int code, void *address); + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + * + * error_code: + * ****0004 Protection -> Write-Protection (suprression) + * ****0010 Segment translation -> Not present (nullification) + * ****0011 Page translation -> Not present (nullification) + */ +asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct * vma; + unsigned long address; + unsigned long fixup; + int write; + int si_code = SEGV_MAPERR; + int kernel_address = 0; + + tsk = current; + mm = tsk->mm; + + /* + * Check for low-address protection. This needs to be treated + * as a special case because the translation exception code + * field is not guaranteed to contain valid data in this case. + */ + if ((error_code & 0xff) == 4 && !(S390_lowcore.trans_exc_code & 4)) { + + /* Low-address protection hit in kernel mode means + NULL pointer write access in kernel mode. */ + if (!(regs->psw.mask & PSW_PROBLEM_STATE)) { + address = 0; + kernel_address = 1; + goto no_context; + } + + /* Low-address protection hit in user mode 'cannot happen'. */ + die ("Low-address protection", regs, error_code); + do_exit(SIGKILL); + } + + /* + * get the failing address + * more specific the segment and page table portion of + * the address + */ + + address = S390_lowcore.trans_exc_code&0x7ffff000; + + + /* + * Check which address space the address belongs to + */ + switch (S390_lowcore.trans_exc_code & 3) + { + case 0: /* Primary Segment Table Descriptor */ + kernel_address = 1; + goto no_context; + + case 1: /* STD determined via access register */ + if (S390_lowcore.exc_access_id == 0) + { + kernel_address = 1; + goto no_context; + } + if (regs && S390_lowcore.exc_access_id < NUM_ACRS) + { + if (regs->acrs[S390_lowcore.exc_access_id] == 0) + { + kernel_address = 1; + goto no_context; + } + if (regs->acrs[S390_lowcore.exc_access_id] == 1) + { + /* user space address */ + break; + } + } + die("page fault via unknown access register", regs, error_code); + do_exit(SIGKILL); + break; + + case 2: /* Secondary Segment Table Descriptor */ + case 3: /* Home Segment Table Descriptor */ + /* user space address */ + break; + } + + /* + * Check whether we have a user MM in the first place. + */ + if (in_interrupt() || !mm || !(regs->psw.mask & _PSW_IO_MASK_BIT)) + goto no_context; + + /* + * When we get here, the fault happened in the current + * task's user address space, so we can switch on the + * interrupts again and then search the VMAs + */ + + __sti(); + + down_read(&mm->mmap_sem); + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, address, NULL)) + goto bad_area; +/* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + write = 0; + si_code = SEGV_ACCERR; + + switch (error_code & 0xFF) { + case 0x04: /* write, present*/ + write = 1; + break; + case 0x10: /* not present*/ + case 0x11: /* not present*/ + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) + goto bad_area; + break; + default: + printk("code should be 4, 10 or 11 (%lX) \n",error_code&0xFF); + goto bad_area; + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + switch (handle_mm_fault(mm, vma, address, write)) { + case 1: + tsk->min_flt++; + break; + case 2: + tsk->maj_flt++; + break; + case 0: + goto do_sigbus; + default: + goto out_of_memory; + } + + up_read(&mm->mmap_sem); + return; + +/* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + up_read(&mm->mmap_sem); + + /* User mode accesses just cause a SIGSEGV */ + if (regs->psw.mask & PSW_PROBLEM_STATE) { + tsk->thread.prot_addr = address; + tsk->thread.trap_no = error_code; +#ifndef CONFIG_SYSCTL +#ifdef CONFIG_PROCESS_DEBUG + printk("User process fault: interruption code 0x%lX\n",error_code); + printk("failing address: %lX\n",address); + show_regs(regs); +#endif +#else + if (sysctl_userprocess_debug) { + printk("User process fault: interruption code 0x%lX\n", + error_code); + printk("failing address: %lX\n", address); + show_regs(regs); + } +#endif + + force_sigsegv(tsk, si_code, (void *)address); + return; + } + +no_context: + /* Are we prepared to handle this kernel fault? */ + if ((fixup = search_exception_table(regs->psw.addr)) != 0) { + regs->psw.addr = fixup; + return; + } + +/* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + + if (kernel_address) + printk(KERN_ALERT "Unable to handle kernel pointer dereference" + " at virtual kernel address %08lx\n", address); + else + printk(KERN_ALERT "Unable to handle kernel paging request" + " at virtual user address %08lx\n", address); + + die("Oops", regs, error_code); + do_exit(SIGKILL); + + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. +*/ +out_of_memory: + up_read(&mm->mmap_sem); + printk("VM: killing process %s\n", tsk->comm); + if (regs->psw.mask & PSW_PROBLEM_STATE) + do_exit(SIGKILL); + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + tsk->thread.prot_addr = address; + tsk->thread.trap_no = error_code; + force_sig(SIGBUS, tsk); + + /* Kernel mode? Handle exceptions or die */ + if (!(regs->psw.mask & PSW_PROBLEM_STATE)) + goto no_context; +} + +/* + * Send SIGSEGV to task. This is an external routine + * to keep the stack usage of do_page_fault small. + */ +static void force_sigsegv(struct task_struct *tsk, int code, void *address) +{ + struct siginfo si; + si.si_signo = SIGSEGV; + si.si_code = code; + si.si_addr = address; + force_sig_info(SIGSEGV, &si, tsk); +} + +typedef struct _pseudo_wait_t { + struct _pseudo_wait_t *next; + wait_queue_head_t queue; + unsigned long address; + int resolved; +} pseudo_wait_t; + +static pseudo_wait_t *pseudo_lock_queue = NULL; +static spinlock_t pseudo_wait_spinlock; /* spinlock to protect lock queue */ + +/* + * This routine handles 'pagex' pseudo page faults. + */ +asmlinkage void +do_pseudo_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + pseudo_wait_t wait_struct; + pseudo_wait_t *ptr, *last, *next; + unsigned long address; + int kernel_address; + + /* + * get the failing address + * more specific the segment and page table portion of + * the address + */ + address = S390_lowcore.trans_exc_code & 0xfffff000; + + if (address & 0x80000000) { + /* high bit set -> a page has been swapped in by VM */ + address &= 0x7fffffff; + spin_lock(&pseudo_wait_spinlock); + last = NULL; + ptr = pseudo_lock_queue; + while (ptr != NULL) { + next = ptr->next; + if (address == ptr->address) { + /* + * This is one of the processes waiting + * for the page. Unchain from the queue. + * There can be more than one process + * waiting for the same page. VM presents + * an initial and a completion interrupt for + * every process that tries to access a + * page swapped out by VM. + */ + if (last == NULL) + pseudo_lock_queue = next; + else + last->next = next; + /* now wake up the process */ + ptr->resolved = 1; + wake_up(&ptr->queue); + } else + last = ptr; + ptr = next; + } + spin_unlock(&pseudo_wait_spinlock); + } else { + /* Pseudo page faults in kernel mode is a bad idea */ + if (!(regs->psw.mask & PSW_PROBLEM_STATE)) { + /* + * VM presents pseudo page faults if the interrupted + * state was not disabled for interrupts. So we can + * get pseudo page fault interrupts while running + * in kernel mode. We simply access the page here + * while we are running disabled. VM will then swap + * in the page synchronously. + */ + kernel_address = 0; + switch (S390_lowcore.trans_exc_code & 3) { + case 0: /* Primary Segment Table Descriptor */ + kernel_address = 1; + break; + case 1: /* STD determined via access register */ + if (S390_lowcore.exc_access_id == 0 || + regs->acrs[S390_lowcore.exc_access_id]==0) + kernel_address = 1; + break; + case 2: /* Secondary Segment Table Descriptor */ + case 3: /* Home Segment Table Descriptor */ + break; + } + if (kernel_address) + /* dereference a virtual kernel address */ + __asm__ __volatile__ ( + " ic 0,0(%0)" + : : "a" (address) : "0"); + else + /* dereference a virtual user address */ + __asm__ __volatile__ ( + " la 2,0(%0)\n" + " sacf 512\n" + " ic 2,0(2)\n" + "0:sacf 0\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,0b\n" + ".previous" + : : "a" (address) : "2" ); + + return; + } + /* initialize and add element to pseudo_lock_queue */ + init_waitqueue_head (&wait_struct.queue); + wait_struct.address = address; + wait_struct.resolved = 0; + spin_lock(&pseudo_wait_spinlock); + wait_struct.next = pseudo_lock_queue; + pseudo_lock_queue = &wait_struct; + spin_unlock(&pseudo_wait_spinlock); + /* go to sleep */ + wait_event(wait_struct.queue, wait_struct.resolved); + } +} + +#ifdef CONFIG_PFAULT +/* + * 'pfault' pseudo page faults routines. + */ +static int pfault_disable = 0; + +static int __init nopfault(char *str) +{ + pfault_disable = 1; + return 1; +} + +__setup("nopfault", nopfault); + +typedef struct { + __u16 refdiagc; + __u16 reffcode; + __u16 refdwlen; + __u16 refversn; + __u64 refgaddr; + __u64 refselmk; + __u64 refcmpmk; + __u64 reserved; +} __attribute__ ((packed)) pfault_refbk_t; + +int pfault_init(void) +{ + pfault_refbk_t refbk = + { 0x258, 0, 5, 2, __LC_KERNEL_STACK, 1ULL << 48, 1ULL << 48, 0ULL }; + int rc; + + if (pfault_disable) + return -1; + __asm__ __volatile__( + " diag %1,%0,0x258\n" + "0: j 2f\n" + "1: la %0,8\n" + "2:\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,1b\n" + ".previous" + : "=d" (rc) : "a" (&refbk) : "cc" ); + __ctl_set_bit(0, 9); + return rc; +} + +void pfault_fini(void) +{ + pfault_refbk_t refbk = + { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; + + if (pfault_disable) + return; + __ctl_clear_bit(0,9); + __asm__ __volatile__( + " diag %0,0,0x258\n" + "0:\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,0b\n" + ".previous" + : : "a" (&refbk) : "cc" ); +} + +asmlinkage void +pfault_interrupt(struct pt_regs *regs, __u16 error_code) +{ + struct task_struct *tsk; + wait_queue_head_t queue; + wait_queue_head_t *qp; + __u16 subcode; + + /* + * Get the external interruption subcode & pfault + * initial/completion signal bit. VM stores this + * in the 'cpu address' field associated with the + * external interrupt. + */ + subcode = S390_lowcore.cpu_addr; + if ((subcode & 0xff00) != 0x0600) + return; + + /* + * Get the token (= address of kernel stack of affected task). + */ + tsk = (struct task_struct *) + (*((unsigned long *) __LC_PFAULT_INTPARM) - THREAD_SIZE); + + /* + * We got all needed information from the lowcore and can + * now safely switch on interrupts. + */ + if (regs->psw.mask & PSW_PROBLEM_STATE) + __sti(); + + if (subcode & 0x0080) { + /* signal bit is set -> a page has been swapped in by VM */ + qp = (wait_queue_head_t *) + xchg(&tsk->thread.pfault_wait, -1); + if (qp != NULL) { + /* Initial interrupt was faster than the completion + * interrupt. pfault_wait is valid. Set pfault_wait + * back to zero and wake up the process. This can + * safely be done because the task is still sleeping + * and can't procude new pfaults. */ + tsk->thread.pfault_wait = 0ULL; + wake_up(qp); + } + } else { + /* signal bit not set -> a real page is missing. */ + init_waitqueue_head (&queue); + qp = (wait_queue_head_t *) + xchg(&tsk->thread.pfault_wait, (addr_t) &queue); + if (qp != NULL) { + /* Completion interrupt was faster than the initial + * interrupt (swapped in a -1 for pfault_wait). Set + * pfault_wait back to zero and exit. This can be + * done safely because tsk is running in kernel + * mode and can't produce new pfaults. */ + tsk->thread.pfault_wait = 0ULL; + } + + /* go to sleep */ + wait_event(queue, tsk->thread.pfault_wait == 0ULL); + } +} +#endif + diff -urNp linux-2.4.7.SuSE-orig/drivers/Makefile linux-2.4.7.SuSE/drivers/Makefile --- linux-2.4.7.SuSE-orig/drivers/Makefile Sun Jul 15 19:15:44 2001 +++ linux-2.4.7.SuSE/drivers/Makefile Mon Jun 24 11:52:48 2002 @@ -39,6 +39,7 @@ subdir-$(CONFIG_PNP) += pnp subdir-$(CONFIG_ISDN) += isdn subdir-$(CONFIG_ATM) += atm subdir-$(CONFIG_FC4) += fc4 +subdir-$(CONFIG_TRACE) += trace # CONFIG_HAMRADIO can be set without CONFIG_NETDEVICE being set -- ch subdir-$(CONFIG_HAMRADIO) += net/hamradio diff -urNp linux-2.4.7.SuSE-orig/drivers/s390/s390io.c linux-2.4.7.SuSE/drivers/s390/s390io.c --- linux-2.4.7.SuSE-orig/drivers/s390/s390io.c Tue Oct 30 16:43:32 2001 +++ linux-2.4.7.SuSE/drivers/s390/s390io.c Mon Jun 24 11:52:48 2002 @@ -40,6 +40,7 @@ #include #include #include +#include #ifdef CONFIG_PROC_FS #include #endif @@ -2490,9 +2491,11 @@ asmlinkage void do_IRQ( struct pt_regs r } irq_enter(cpu, irq); + TRACE_IRQ_ENTRY(irq, !(((regs).psw.mask&PSW_PROBLEM_STATE) != 0)); s390irq_spin_lock( irq ); s390_process_IRQ( irq ); s390irq_spin_unlock( irq ); + TRACE_IRQ_EXIT(); irq_exit(cpu, irq); } diff -urNp linux-2.4.7.SuSE-orig/drivers/s390/s390mach.c linux-2.4.7.SuSE/drivers/s390/s390mach.c --- linux-2.4.7.SuSE-orig/drivers/s390/s390mach.c Tue Oct 30 16:43:32 2001 +++ linux-2.4.7.SuSE/drivers/s390/s390mach.c Mon Jun 24 11:52:48 2002 @@ -6,12 +6,14 @@ * S390 version * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Ingo Adlung (adlung@de.ibm.com) + * Portions added by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation */ #include #include #include #include +#include #ifdef CONFIG_SMP #include #endif @@ -133,6 +135,8 @@ void s390_do_machine_check( void ) { int crw_count; mcic_t mcic; + uint64_t ltt_interruption_code; + uint32_t ltt_old_psw; #ifdef S390_MACHCHK_DEBUG printk( "s390_do_machine_check : starting ...\n"); @@ -141,6 +145,14 @@ void s390_do_machine_check( void ) memcpy( &mcic, &S390_lowcore.mcck_interruption_code, sizeof(__u64)); + memcpy( <t_interruption_code, + &S390_lowcore.mcck_interruption_code, + sizeof(__u64)); + memcpy( <t_old_psw, + &S390_lowcore.mcck_old_psw, + sizeof(uint32_t)); + ltt_old_psw &= PSW_ADDR_MASK; + TRACE_TRAP_ENTRY(ltt_interruption_code,ltt_old_psw); if ( mcic.mcc.mcd.cp ) // CRW pending ? { diff -urNp linux-2.4.7.SuSE-orig/drivers/trace/Makefile linux-2.4.7.SuSE/drivers/trace/Makefile --- linux-2.4.7.SuSE-orig/drivers/trace/Makefile Wed Dec 31 19:00:00 1969 +++ linux-2.4.7.SuSE/drivers/trace/Makefile Mon Jun 24 11:52:48 2002 @@ -0,0 +1,17 @@ +# +# Makefile for the kernel tracing drivers. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now inherited from the +# parent makes.. +# + +O_TARGET := trace_driver.o + +# Is it loaded as a module or as part of the kernel +obj-$(CONFIG_TRACE) = tracer.o + +include $(TOPDIR)/Rules.make diff -urNp linux-2.4.7.SuSE-orig/drivers/trace/tracer.c linux-2.4.7.SuSE/drivers/trace/tracer.c --- linux-2.4.7.SuSE-orig/drivers/trace/tracer.c Wed Dec 31 19:00:00 1969 +++ linux-2.4.7.SuSE/drivers/trace/tracer.c Mon Jun 24 11:52:48 2002 @@ -0,0 +1,1403 @@ +/***************************************************************** + * File : tracer.c + * Description : + * Contains the code for the kernel tracing driver (tracer + * for short). + * Author : + * Karim Yaghmour + * Date : + * 03/12/01, Added user event support. + * 05/01/01, Modified PPC bit manipulation functions for + * x86 compatibility. (andy_lowe@mvista.com) + * 15/11/00, Finally fixed memory allocation and remapping + * method. Now using BTTV-driver-inspired code. + * 13/03/00, Modified tracer so that the daemon mmaps the + * tracer's buffers in it's address space rather + * than use "read". + * 26/01/00, Added support for standardized buffer sizes and + * extensibility of events. + * 01/10/99, Modified tracer in order to used double-buffering. + * 28/09/99, Adding tracer configuration support. + * 09/09/99, Chaging the format of an event record in order to + * reduce the size of the traces. + * 04/03/99, Initial typing. + * Note : + * The sizes of the variables used to store the details of an + * event are planned for a system who gets at least one clock + * tick every 10milli-seconds. There has to be at least one + * event every 2^32-1 microseconds, otherwise the size of the + * variable holding the time doesn't work anymore. + *****************************************************************/ + +/* Module and initialization stuff */ +#include +#include + +/* Necessary includes */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* Local defintions */ +#include "tracer.h" + +/* Module information */ +MODULE_AUTHOR ("Karim Yaghmour (karym@opersys.com)"); +MODULE_DESCRIPTION("Linux Trace Toolkit (LTT) kernel tracing driver"); +// MODULE_LICENSE ("GPL"); + +/* Local variables */ +/* Driver */ +static int sMajorNumber; /* Major number of the tracer */ +static int sOpenCount; /* Number of times device is open */ +/* Locking */ +static int sTracLock; /* Tracer lock used to lock primary buffer */ +static spinlock_t sSpinLock; /* Spinlock in order to lock kernel */ +/* Daemon */ +static int sSignalSent; /* A signal has been sent to the daemon */ +static struct task_struct* sDaemonTaskStruct; /* Task structure of the tracer daemon */ +/* Tracer configuration */ +static int sTracerStarted; /* Is the tracer started */ +static trace_event_mask sTracedEvents; /* Bit-field of events being traced */ +static trace_event_mask sLogEventDetailsMask; /* Log the details of the events mask */ +static int sLogCPUID; /* Log the CPUID associated with each event */ +static int sUseSyscallEIPBounds; /* Use adress bounds to fetch the EIP where call is made */ +static int sLowerEIPBoundSet; /* The lower bound EIP has been set */ +static int sUpperEIPBoundSet; /* The upper bound EIP has been set */ +static void* sLowerEIPBound; /* The lower bound EIP */ +static void* sUpperEIPBound; /* The upper bound EIP */ +static int sTracingPID; /* Tracing only the events for one pid */ +static int sTracingPGRP; /* Tracing only the events for one process group */ +static int sTracingGID; /* Tracing only the events for one gid */ +static int sTracingUID; /* Tracing only the events for one uid */ +static pid_t sTracedPID; /* PID being traced */ +static pid_t sTracedPGRP; /* Process group being traced */ +static gid_t sTracedGID; /* GID being traced */ +static uid_t sTracedUID; /* UID being traced */ +static int sSyscallEIPDepthSet; /* The call depth at which to fetch EIP has been set */ +static int sSyscallEIPDepth; /* The call depth at which to fetch the EIP */ +/* Event data buffers */ +static int sBufReadComplete; /* Number of buffers completely filled */ +static int sSizeReadIncomplete; /* Quantity of data read from incomplete buffers */ +static int sEventsLost; /* Number of events lost because of lack of buffer space */ +static uint32_t sBufSize; /* Buffer sizes */ +static uint32_t sAllocSize; /* Size of buffers allocated */ +static uint32_t sBufferID; /* Unique buffer ID */ +static char* sTracBuf = NULL; /* Trace buffer */ +static char* sWritBuf = NULL; /* Buffer used for writting */ +static char* sReadBuf = NULL; /* Buffer used for reading */ +static char* sWritBufEnd; /* End of write buffer */ +static char* sReadBufEnd; /* End of read buffer */ +static char* sWritPos; /* Current position for writting */ +static char* sReadLimit; /* Limit at which read should stop */ +static char* sWritLimit; /* Limit at which write should stop */ +/* Time */ +static struct timeval sBufferStartTime; /* The time at which the buffer was started */ +/* Large data components allocated at load time */ +static char* sUserEventData = NULL; /* The data associated with a user event */ + + +/* The size of the structures used to describe the events */ +static int sEventStructSize[TRACE_EV_MAX + 1] = +{ + sizeof(trace_start) /* TRACE_START */, + sizeof(trace_syscall_entry) /* TRACE_SYSCALL_ENTRY */, + 0 /* TRACE_SYSCALL_EXIT */, + sizeof(trace_trap_entry) /* TRACE_TRAP_ENTRY */, + 0 /* TRACE_TRAP_EXIT */, + sizeof(trace_irq_entry) /* TRACE_IRQ_ENTRY */, + 0 /* TRACE_IRQ_EXIT */, + sizeof(trace_schedchange) /* TRACE_SCHEDCHANGE */, + 0 /* TRACE_KERNEL_TIMER */, + sizeof(trace_soft_irq) /* TRACE_SOFT_IRQ */, + sizeof(trace_process) /* TRACE_PROCESS */, + sizeof(trace_file_system) /* TRACE_FILE_SYSTEM */, + sizeof(trace_timer) /* TRACE_TIMER */, + sizeof(trace_memory) /* TRACE_MEMORY */, + sizeof(trace_socket) /* TRACE_SOCKET */, + sizeof(trace_ipc) /* TRACE_IPC */, + sizeof(trace_network) /* TRACE_NETWORK */, + sizeof(trace_buffer_start) /* TRACE_BUFFER_START */, + 0 /* TRACE_BUFFER_END */, + sizeof(trace_new_event) /* TRACE_NEW_EVENT */, + sizeof(trace_custom) /* TRACE_CUSTOM */, + sizeof(trace_change_mask) /* TRACE_CHANGE_MASK */ +}; + +/* The file operations available for the tracer */ +static struct file_operations sTracerFileOps = +{ + owner: THIS_MODULE, + ioctl: tracer_ioctl, + mmap: tracer_mmap, + open: tracer_open, + release: tracer_release, + fsync: tracer_fsync, +}; + +/************************************************************************************************************/ +/************************************** Code inspired from BTTV driver **************************************/ +/************************************************************************************************************/ +#define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE /* This inspired by rtai/shmem */ + +/* Given PGD from the address space's page table, return the kernel + * virtual mapping of the physical memory mapped at ADR. + */ +static inline unsigned long uvirt_to_kva(pgd_t *pgd, unsigned long adr) +{ + unsigned long ret = 0UL; + pmd_t *pmd; + pte_t *ptep, pte; + + if (!pgd_none(*pgd)) { + pmd = pmd_offset(pgd, adr); + if (!pmd_none(*pmd)) { + ptep = pte_offset(pmd, adr); + pte = *ptep; + if(pte_present(pte)) { + ret = (unsigned long) page_address(pte_page(pte)); + ret |= (adr & (PAGE_SIZE - 1)); + } + } + } + return ret; +} + +/* Here we want the physical address of the memory. + * This is used when initializing the contents of the + * area and marking the pages as reserved. + */ +static inline unsigned long kvirt_to_pa(unsigned long adr) +{ + unsigned long va, kva, ret; + + va = VMALLOC_VMADDR(adr); + kva = uvirt_to_kva(pgd_offset_k(va), va); + ret = __pa(kva); + return ret; +} + +static void * rvmalloc(signed long size) +{ + void * mem; + unsigned long adr, page; + + mem=vmalloc_32(size); + if (mem) + { + memset(mem, 0, size); /* Clear the ram out, no junk to the user */ + adr=(unsigned long) mem; + while (size > 0) + { + page = kvirt_to_pa(adr); + mem_map_reserve(virt_to_page(__va(page))); + adr+=PAGE_SIZE; + size-=PAGE_SIZE; + } + } + return mem; +} + +static void rvfree(void * mem, signed long size) +{ + unsigned long adr, page; + + if (mem) + { + adr=(unsigned long) mem; + while (size > 0) + { + page = kvirt_to_pa(adr); + mem_map_unreserve(virt_to_page(__va(page))); + adr+=PAGE_SIZE; + size-=PAGE_SIZE; + } + vfree(mem); + } +} + +static int tracer_mmap_region(const char *adr, const char *start_pos, unsigned long size) +{ + unsigned long start=(unsigned long) adr; + unsigned long page,pos; + + pos=(unsigned long) start_pos; + while (size > 0) + { + page = kvirt_to_pa(pos); + if (remap_page_range(start, page, PAGE_SIZE, PAGE_SHARED)) + return -EAGAIN; + start+=PAGE_SIZE; + pos+=PAGE_SIZE; + size-=PAGE_SIZE; + } + return 0; +} +/************************************************************************************************************/ +/************************************************************************************************************/ +/************************************************************************************************************/ + +/************************************************************** + * Macro : tracer_write_to_buffer() + * Description : + * Writes data to the destination buffer and updates the + * begining the buffer write position. + **************************************************************/ +#define tracer_write_to_buffer(DEST, SRC, SIZE) \ +do\ +{\ + memcpy(DEST, SRC, SIZE);\ + DEST += SIZE;\ +} while(0); + +/************************************************************** + * Function : trace() + * Description : Tracing function per se. + * Parameters : + * pmEventID, ID of event as defined in linux/trace.h + * pmEventStruct, struct describing the event + * Return values : + * 0, if everything went OK (event got registered) + * -ENODEV, no tracing daemon opened the driver. + * -ENOMEM, no more memory to store events. + * -EBUSY, tracer not started yet. + * Note : + * The kernel has to be locked here because trace() could + * be called from an interrupt handling routine and from + * a process service routine. + **************************************************************/ +int trace(uint8_t pmEventID, + void* pmEventStruct) +{ + int lVarDataLen = 0; /* Length of variable length data to be copied, if any */ + void* lVarDataBeg = NULL; /* Begining of variable length data to be copied */ + int lSendSignal = FALSE; /* Should the daemon be summoned */ + uint8_t lCPUID; /* CPUID of currently runing process */ + uint16_t lDataSize; /* Size of tracing data */ + struct siginfo lSigInfo; /* Signal information */ + struct timeval lTime; /* Event time */ + unsigned long int lFlags; /* CPU flags for lock */ + trace_time_delta lTimeDelta; /* The time elapsed between now and the last event */ + struct task_struct* pIncomingProcess = NULL; /* Pointer to incoming process */ + + /* Is there a tracing daemon */ + if(sDaemonTaskStruct == NULL) + return -ENODEV; + + /* Is this the exit of a process? */ + if((pmEventID == TRACE_EV_PROCESS) && + (pmEventStruct != NULL) && + ((((trace_process*) pmEventStruct)->event_sub_id) == TRACE_EV_PROCESS_EXIT)) + trace_destroy_owners_events(current->pid); + + /* Do we trace the event */ + if((sTracerStarted == TRUE) || (pmEventID == TRACE_EV_START) || (pmEventID == TRACE_EV_BUFFER_START)) + goto TraceEvent; + + /* We can't continue */ + return -EBUSY; + +TraceEvent: + + /* Are we monitoring this event */ + if(!ltt_test_bit(pmEventID, &sTracedEvents)) + return 0; + + /* Always let the start event pass, whatever the IDs */ + if((pmEventID != TRACE_EV_START) && (pmEventID != TRACE_EV_BUFFER_START)) + { + /* Is this a scheduling change */ + if(pmEventID == TRACE_EV_SCHEDCHANGE) + { + /* Get pointer to incoming process */ + pIncomingProcess = (struct task_struct*) (((trace_schedchange*) pmEventStruct)->in); + + /* Set PID information in schedchange event */ + (((trace_schedchange*) pmEventStruct)->in) = pIncomingProcess->pid; + } + + /* Are we monitoring a particular process */ + if((sTracingPID == TRUE) && (current->pid != sTracedPID)) + { + /* Record this event if it is the scheduling change bringing in the traced PID */ + if(pIncomingProcess == NULL) + return 0; + else if (pIncomingProcess->pid != sTracedPID) + return 0; + } + + /* Are we monitoring a particular process group */ + if((sTracingPGRP == TRUE) && (current->pgrp != sTracedPGRP)) + { + /* Record this event if it is the scheduling change bringing in a process of the traced PGRP */ + if(pIncomingProcess == NULL) + return 0; + else if (pIncomingProcess->pgrp != sTracedPGRP) + return 0; + } + + /* Are we monitoring the processes of a given group of users */ + if((sTracingGID == TRUE) && (current->egid != sTracedGID)) + { + /* Record this event if it is the scheduling change bringing in a process of the traced GID */ + if(pIncomingProcess == NULL) + return 0; + else if (pIncomingProcess->egid != sTracedGID) + return 0; + } + + /* Are we monitoring the processes of a given user */ + if((sTracingUID == TRUE) && (current->euid != sTracedUID)) + { + /* Record this event if it is the scheduling change bringing in a process of the traced UID */ + if(pIncomingProcess == NULL) + return 0; + else if (pIncomingProcess->euid != sTracedUID) + return 0; + } + } + + /* Compute size of tracing data */ + lDataSize = sizeof(pmEventID) + sizeof(lTimeDelta) + sizeof(lDataSize); + + /* Do we log the event details */ + if(ltt_test_bit(pmEventID, &sLogEventDetailsMask)) + { + /* Update the size of the data entry */ + lDataSize += sEventStructSize[pmEventID]; + + /* Some events have variable length */ + switch(pmEventID) + { + /* Is there a file name in this */ + case TRACE_EV_FILE_SYSTEM: + if((((trace_file_system*) pmEventStruct)->event_sub_id == TRACE_EV_FILE_SYSTEM_EXEC) + || (((trace_file_system*) pmEventStruct)->event_sub_id == TRACE_EV_FILE_SYSTEM_OPEN)) + { + /* Remember the string's begining and update size variables */ + lVarDataBeg = ((trace_file_system*) pmEventStruct)->file_name; + lVarDataLen = ((trace_file_system*) pmEventStruct)->event_data2 + 1; + lDataSize += (uint16_t) lVarDataLen; + } + break; + + /* Logging of a custom event */ + case TRACE_EV_CUSTOM: + lVarDataBeg = ((trace_custom*) pmEventStruct)->data; + lVarDataLen = ((trace_custom*) pmEventStruct)->data_size; + lDataSize += (uint16_t) lVarDataLen; + break; + } + } + + /* Do we record the CPUID */ + if((sLogCPUID == TRUE) && (pmEventID != TRACE_EV_START) && (pmEventID != TRACE_EV_BUFFER_START)) + { + /* Remember the CPUID */ + lCPUID = smp_processor_id(); + + /* Update the size of the data entry */ + lDataSize += sizeof(lCPUID); + } + + /* Lock the kernel */ + spin_lock_irqsave(&sSpinLock, lFlags); + + /* The following time calculations have to be done within the spinlock because + otherwise the event order could be inverted. */ + + /* Get the time of the event */ + do_gettimeofday(&lTime); + + /* Compute the time delta between this event and the time at which this buffer was started */ + lTimeDelta = (lTime.tv_sec - sBufferStartTime.tv_sec) * 1000000 + + (lTime.tv_usec - sBufferStartTime.tv_usec); + + /* Is there enough space left in the write buffer */ + if(sWritPos + lDataSize > sWritLimit) + { + /* Have we already switched buffers and informed the daemon of it */ + if(sSignalSent == TRUE) + { + /* We've lost another event */ + sEventsLost++; + + /* Bye, bye, now */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + return -ENOMEM; + } + + /* We need to inform the daemon */ + lSendSignal = TRUE; + + /* Switch buffers */ + tracer_switch_buffers(lTime); + + /* Recompute the time delta since sBufferStartTime has changed because of the buffer change */ + lTimeDelta = (lTime.tv_sec - sBufferStartTime.tv_sec) * 1000000 + + (lTime.tv_usec - sBufferStartTime.tv_usec); + } + + /* Write the CPUID to the tracing buffer, if required */ + if((sLogCPUID == TRUE) && (pmEventID != TRACE_EV_START) && (pmEventID != TRACE_EV_BUFFER_START)) + tracer_write_to_buffer(sWritPos, + &lCPUID, + sizeof(lCPUID)); + + /* Write event type to tracing buffer */ + tracer_write_to_buffer(sWritPos, + &pmEventID, + sizeof(pmEventID)); + + /* Write event time delta to tracing buffer */ + tracer_write_to_buffer(sWritPos, + &lTimeDelta, + sizeof(lTimeDelta)); + + /* Do we log event details */ + if(ltt_test_bit(pmEventID, &sLogEventDetailsMask)) + { + /* Write event structure */ + tracer_write_to_buffer(sWritPos, + pmEventStruct, + sEventStructSize[pmEventID]); + + /* Write string if any */ + if(lVarDataLen) + tracer_write_to_buffer(sWritPos, + lVarDataBeg, + lVarDataLen); + } + + /* Write the length of the event description */ + tracer_write_to_buffer(sWritPos, + &lDataSize, + sizeof(lDataSize)); + + /* Should the tracing daemon be notified */ + if(lSendSignal == TRUE) + { + /* Remember that a signal has been sent */ + sSignalSent = TRUE; + + /* Unlock the kernel */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + + /* Setup signal information */ + lSigInfo.si_signo = SIGIO; + lSigInfo.si_errno = 0; + lSigInfo.si_code = SI_KERNEL; + + /* DEBUG */ +#if 0 + printk("<1> Sending SIGIO to %d \n", sDaemonTaskStruct->pid); +#endif + + /* Signal the tracing daemon */ + send_sig_info(SIGIO, &lSigInfo, sDaemonTaskStruct); + } + else + /* Unlock the kernel */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + + /* Indicate to the caller that everything is OK */ + return 0; +} + +/************************************************************* + * Function : tracer_switch_buffers() + * Description : + * Put the current write buffer to be read and reset put + * the old read buffer to be written to. Set the tracer + * variables in consequence. + * Parameters : + * pmTime, current time + * Return values : + * NONE + * Note : + * This should be called from within a spin_lock. + *************************************************************/ +void tracer_switch_buffers(struct timeval pmTime) +{ + char* lTempBuf; /* Temporary buffer pointer */ + char* lTempBufEnd; /* Temporary buffer end pointer */ + char* lInitWritPos; /* Initial write position */ + uint8_t lEventID; /* Event ID of last event */ + uint8_t lCPUID; /* CPUID of currently runing process */ + uint16_t lDataSize; /* Size of tracing data */ + uint32_t lSizeLost; /* Size delta between last event and end of buffer */ + trace_time_delta lTimeDelta; /* The time elapsed between now and the last event */ + trace_buffer_start lStartBufferEvent; /* Start of the new buffer event */ + + /* Remember initial write position */ + lInitWritPos = sWritPos; + + /* Write the end event at the write of the buffer */ + + /* Write the CPUID to the tracing buffer, if required */ + if(sLogCPUID == TRUE) + { + lCPUID = smp_processor_id(); + tracer_write_to_buffer(sWritPos, + &lCPUID, + sizeof(lCPUID)); + } + + /* Write event type to tracing buffer */ + lEventID = TRACE_EV_BUFFER_END; + tracer_write_to_buffer(sWritPos, + &lEventID, + sizeof(lEventID)); + + /* Write event time delta to tracing buffer */ + lTimeDelta = 0; + tracer_write_to_buffer(sWritPos, + &lTimeDelta, + sizeof(lTimeDelta)); + + /* Get size lost */ + lSizeLost = sWritBufEnd - lInitWritPos; + + /* Write size lost at the end of the buffer */ + *((uint32_t*) (sWritBufEnd - sizeof(lSizeLost))) = lSizeLost; + + /* Switch buffers */ + lTempBuf = sReadBuf; + sReadBuf = sWritBuf; + sWritBuf = lTempBuf; + + /* Set buffer ends */ + lTempBufEnd = sReadBufEnd; + sReadBufEnd = sWritBufEnd; + sWritBufEnd = lTempBufEnd; + + /* Set read limit */ + sReadLimit = sReadBufEnd; + + /* Set write limit */ + sWritLimit = sWritBufEnd - TRACER_LAST_EVENT_SIZE; + + /* Set write position */ + sWritPos = sWritBuf; + + /* Increment buffer ID */ + sBufferID++; + + /* Set the time of begining of this buffer */ + sBufferStartTime = pmTime; + + /* Write the start of buffer event */ + lStartBufferEvent.ID = sBufferID; + lStartBufferEvent.Time = pmTime; + + /* Write event type to tracing buffer */ + lEventID = TRACE_EV_BUFFER_START; + tracer_write_to_buffer(sWritPos, + &lEventID, + sizeof(lEventID)); + + /* Write event time delta to tracing buffer */ + lTimeDelta = 0; + tracer_write_to_buffer(sWritPos, + &lTimeDelta, + sizeof(lTimeDelta)); + + /* Write event structure */ + tracer_write_to_buffer(sWritPos, + &lStartBufferEvent, + sizeof(lStartBufferEvent)); + + /* Compute the data size */ + lDataSize = sizeof(lEventID) + + sizeof(lTimeDelta) + + sizeof(lStartBufferEvent) + + sizeof(lDataSize); + + /* Write the length of the event description */ + tracer_write_to_buffer(sWritPos, + &lDataSize, + sizeof(lDataSize)); +} + +/************************************************************* + * Function : tracer_ioctl() + * Description : "Ioctl" file op + * Parameters : + * pmInode, the inode associated with the device + * pmFile, file structure given to the acting process + * pmCmd, command given by the caller + * pmArg, arguments to the command + * Return values : + * >0, In case the caller requested the number of events + * lost. + * 0, Everything went OK + * -ENOSYS, no such command + * -EINVAL, tracer not properly configured + * -EBUSY, tracer can't be reconfigured while in operation + * -ENOMEM, no more memory + * -EFAULT, unable to access user space memory + * Note : + * In the future, this function should check to make sure + * that it's the server that make thes ioctl. + *************************************************************/ +int tracer_ioctl(struct inode* pmInode, + struct file* pmFile, + unsigned int pmCmd, + unsigned long pmArg) +{ + int lRetValue; /* Function return value */ + int lDevMinor; /* Device minor number */ + int lNewUserEventID; /* ID of newly created user event */ + trace_start lStartEvent; /* Event marking the begining of the trace */ + unsigned long int lFlags; /* CPU flags for lock */ + trace_custom lUserEvent; /* The user event to be logged */ + trace_change_mask lTraceMask; /* Event mask */ + trace_new_event lNewUserEvent; /* The event to be created for the user */ + trace_buffer_start lStartBufferEvent; /* Start of the new buffer event */ + +#if 0 + printk("Tracer: Command %d \n", pmCmd); +#endif + + /* Get device's minor number */ + lDevMinor = MINOR(pmInode->i_rdev) & 0xf; + + /* If the tracer is started, the daemon can't modify the configuration */ + if((lDevMinor == 0) + && (sTracerStarted == TRUE) && (pmCmd != TRACER_STOP) && (pmCmd != TRACER_DATA_COMITTED)) + return -EBUSY; + + /* Only some operation are permitted to user processes trying to log events */ + if((lDevMinor == 1) + && (pmCmd != TRACER_CREATE_USER_EVENT) + && (pmCmd != TRACER_DESTROY_USER_EVENT) + && (pmCmd != TRACER_TRACE_USER_EVENT) + && (pmCmd != TRACER_SET_EVENT_MASK) + && (pmCmd != TRACER_GET_EVENT_MASK)) + return -ENOSYS; + + /* Depending on the command executed */ + switch(pmCmd) + { + /* Start the tracer */ + case TRACER_START : + /* Check if the device has been properly set up */ + if(((sUseSyscallEIPBounds == TRUE) + &&(sSyscallEIPDepthSet == TRUE)) + ||((sUseSyscallEIPBounds == TRUE) + &&((sLowerEIPBoundSet != TRUE) + ||(sUpperEIPBoundSet != TRUE))) + ||((sTracingPID == TRUE) + &&(sTracingPGRP == TRUE))) + return -EINVAL; + + /* Set the kernel-side trace configuration */ + if(trace_set_config(trace, + sSyscallEIPDepthSet, + sUseSyscallEIPBounds, + sSyscallEIPDepth, + sLowerEIPBound, + sUpperEIPBound) < 0) + return -EINVAL; + + /* Always log the start event and the buffer start event */ + ltt_set_bit(TRACE_EV_BUFFER_START, &sTracedEvents); + ltt_set_bit(TRACE_EV_BUFFER_START, &sLogEventDetailsMask); + ltt_set_bit(TRACE_EV_START, &sTracedEvents); + ltt_set_bit(TRACE_EV_START, &sLogEventDetailsMask); + ltt_set_bit(TRACE_EV_CHANGE_MASK, &sTracedEvents); + ltt_set_bit(TRACE_EV_CHANGE_MASK, &sLogEventDetailsMask); + + /* Get the time of start */ + do_gettimeofday(&sBufferStartTime); + + /* Set the event description */ + lStartBufferEvent.ID = sBufferID; + lStartBufferEvent.Time = sBufferStartTime; + + /* Set the event description */ + lStartEvent.MagicNumber = TRACER_MAGIC_NUMBER; +#ifdef __i386__ + lStartEvent.ArchType = TRACE_ARCH_TYPE_I386; + lStartEvent.ArchVariant = TRACE_ARCH_VARIANT_NONE; +#endif +#ifdef __powerpc__ + lStartEvent.ArchType = TRACE_ARCH_TYPE_PPC; +#if defined(CONFIG_4xx) + lStartEvent.ArchVariant = TRACE_ARCH_VARIANT_PPC_4xx; +#elif defined(CONFIG_6xx) + lStartEvent.ArchVariant = TRACE_ARCH_VARIANT_PPC_6xx; +#elif defined(CONFIG_8xx) + lStartEvent.ArchVariant = TRACE_ARCH_VARIANT_PPC_8xx; +#elif defined(CONFIG_PPC_ISERIES) + lStartEvent.ArchVariant = TRACE_ARCH_VARIANT_PPC_ISERIES; +#endif +#endif +#ifdef __sh__ + lStartEvent.ArchType = TRACE_ARCH_TYPE_SH; + lStartEvent.ArchVariant = TRACE_ARCH_VARIANT_NONE; +#endif +#ifdef __s390__ + lStartEvent.ArchType = TRACE_ARCH_TYPE_S390; + lStartEvent.ArchVariant = TRACE_ARCH_VARIANT_NONE; +#endif +#ifdef __mips__ + lStartEvent.ArchType = TRACE_ARCH_TYPE_MIPS; + lStartEvent.ArchVariant = TRACE_ARCH_VARIANT_NONE; +#endif + lStartEvent.SystemType = TRACE_SYS_TYPE_VANILLA_LINUX; + lStartEvent.MajorVersion = TRACER_VERSION_MAJOR; + lStartEvent.MinorVersion = TRACER_VERSION_MINOR; + lStartEvent.BufferSize = sBufSize; + lStartEvent.EventMask = sTracedEvents; + lStartEvent.DetailsMask = sLogEventDetailsMask; + lStartEvent.LogCPUID = sLogCPUID; + + /* Trace the buffer start event */ + trace(TRACE_EV_BUFFER_START, &lStartBufferEvent); + + /* Trace the start event */ + trace(TRACE_EV_START, &lStartEvent); + + /* We can start tracing */ + sTracerStarted = TRUE; + + /* Reregister custom trace events created earlier */ + trace_reregister_custom_events(); + break; + + /* Stop the tracer */ + case TRACER_STOP : + /* Stop tracing */ + sTracerStarted = FALSE; + + /* Acquire the lock to avoid SMP case of where another CPU is writing a trace + while buffer is being switched */ + spin_lock_irqsave(&sSpinLock, lFlags); + + /* Switch the buffers to ensure that the end of the buffer mark is set (time isn't important) */ + tracer_switch_buffers(sBufferStartTime); + + /* Release lock */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + break; + + /* Set the tracer to the default configuration */ + case TRACER_CONFIG_DEFAULT : + tracer_set_default_config(); + break; + + /* Set the memory buffers the daemon wants us to use */ + case TRACER_CONFIG_MEMORY_BUFFERS : + /* Is the given size "reasonnable" */ + if(pmArg < TRACER_MIN_BUF_SIZE) + return -EINVAL; + + /* Set the buffer's size */ + return tracer_set_buffer_size(pmArg); + break; + + /* Trace the given events */ + case TRACER_CONFIG_EVENTS : + if(copy_from_user(&sTracedEvents, (void*) pmArg, sizeof(sTracedEvents))) + return -EFAULT; + break; + + /* Record the details of the event, or not */ + case TRACER_CONFIG_DETAILS : + if(copy_from_user(&sLogEventDetailsMask, (void*) pmArg, sizeof(sLogEventDetailsMask))) + return -EFAULT; + break; + + /* Record the CPUID associated with the event */ + case TRACER_CONFIG_CPUID : + sLogCPUID = TRUE; + break; + + /* Trace only one process */ + case TRACER_CONFIG_PID : + sTracingPID = TRUE; + sTracedPID = pmArg; + break; + + /* Trace only the given process group */ + case TRACER_CONFIG_PGRP : + sTracingPGRP = TRUE; + sTracedPGRP = pmArg; + break; + + /* Trace the processes of a given group of users */ + case TRACER_CONFIG_GID : + sTracingGID = TRUE; + sTracedGID = pmArg; + break; + + /* Trace the processes of a given user */ + case TRACER_CONFIG_UID : + sTracingUID = TRUE; + sTracedUID = pmArg; + break; + + /* Set the call depth a which the EIP should be fetched on syscall */ + case TRACER_CONFIG_SYSCALL_EIP_DEPTH : + sSyscallEIPDepthSet = TRUE; + sSyscallEIPDepth = pmArg; + break; + + /* Set the lowerbound address from which EIP is recorded on syscall */ + case TRACER_CONFIG_SYSCALL_EIP_LOWER : + /* We are using bounds for fetching the EIP where syscall was made */ + sUseSyscallEIPBounds = TRUE; + + /* Set the lower bound */ + sLowerEIPBound = (void*) pmArg; + + /* The lower bound has been set */ + sLowerEIPBoundSet = TRUE; + break; + + /* Set the upperbound address from which EIP is recorded on syscall */ + case TRACER_CONFIG_SYSCALL_EIP_UPPER : + /* We are using bounds for fetching the EIP where syscall was made */ + sUseSyscallEIPBounds = TRUE; + + /* Set the lower bound */ + sUpperEIPBound = (void*) pmArg; + + /* The lower bound has been set */ + sUpperEIPBoundSet = TRUE; + break; + + /* The daemon has comitted the last trace */ + case TRACER_DATA_COMITTED : +#if 0 + printk("Tracer: Data has been comitted \n"); +#endif + + /* Safely set the signal sent flag to FALSE */ + spin_lock_irqsave(&sSpinLock, lFlags); + sSignalSent = FALSE; + spin_unlock_irqrestore(&sSpinLock, lFlags); + break; + + /* Get the number of events lost */ + case TRACER_GET_EVENTS_LOST : + return sEventsLost; + break; + + /* Create a user event */ + case TRACER_CREATE_USER_EVENT : + /* Copy the information from user space */ + if(copy_from_user(&lNewUserEvent, (void*) pmArg, sizeof(lNewUserEvent))) + return -EFAULT; + + /* Create the event */ + lNewUserEventID = trace_create_owned_event(lNewUserEvent.type, + lNewUserEvent.desc, + lNewUserEvent.format_type, + lNewUserEvent.form, + current->pid); + + /* Has the operation succeded */ + if(lNewUserEventID >= 0) + { + /* Set the event ID */ + lNewUserEvent.id = lNewUserEventID; + + /* Copy the event information back to user space */ + if(copy_to_user((void*) pmArg, &lNewUserEvent, sizeof(lNewUserEvent))) + { + /* Since we were unable to tell the user about the event, destroy it */ + trace_destroy_event(lNewUserEventID); + return -EFAULT; + } + } + else + /* Forward trace_create_event()'s error code */ + return lNewUserEventID; + break; + + /* Destroy a user event */ + case TRACER_DESTROY_USER_EVENT : + /* Pass on the user's request */ + trace_destroy_event((int) pmArg); + break; + + /* Trace a user event */ + case TRACER_TRACE_USER_EVENT : + /* Copy the information from user space */ + if(copy_from_user(&lUserEvent, (void*) pmArg, sizeof(lUserEvent))) + return -EFAULT; + + /* Copy the user event data */ + if(copy_from_user(sUserEventData, lUserEvent.data, lUserEvent.data_size)) + return -EFAULT; + + /* Log the raw event */ + lRetValue = trace_raw_event(lUserEvent.id, + lUserEvent.data_size, + sUserEventData); + + /* Has the operation failed */ + if(lRetValue < 0) + /* Forward trace_create_event()'s error code */ + return lRetValue; + break; + + /* Set event mask */ + case TRACER_SET_EVENT_MASK : + /* Copy the information from user space */ + if(copy_from_user(&(lTraceMask.mask), (void*) pmArg, sizeof(lTraceMask.mask))) + return -EFAULT; + + /* Trace the event */ + lRetValue = trace(TRACE_EV_CHANGE_MASK, &lTraceMask); + + /* Change the event mask. (This has to be done second or else may loose the + information if the user decides to stop logging "change mask" events) */ + memcpy(&sTracedEvents, &(lTraceMask.mask), sizeof(lTraceMask.mask)); + + /* Always trace the buffer start, the trace start and the change mask */ + ltt_set_bit(TRACE_EV_BUFFER_START, &sTracedEvents); + ltt_set_bit(TRACE_EV_START, &sTracedEvents); + ltt_set_bit(TRACE_EV_CHANGE_MASK, &sTracedEvents); + + /* Forward trace()'s error code */ + return lRetValue; + break; + + /* Get event mask */ + case TRACER_GET_EVENT_MASK : + /* Copy the information to user space */ + if(copy_to_user((void*) pmArg, &sTracedEvents, sizeof(sTracedEvents))) + return -EFAULT; + break; + + /* Unknow command */ + default : + return -ENOSYS; + } + + /* Everything went OK */ + return 0; +} + +/************************************************************* + * Function : tracer_mmap() + * Description : "Mmap" file op + * Parameters : + * pmInode, the inode associated with the device + * pmFile, file structure given to the acting process + * pmVmArea, Virtual memory area description structure + * Return values : + * 0 if ok + * -EAGAIN, when remap failed + * -EACCESS, permission denied + ************************************************************/ +int tracer_mmap(struct file* pmFile, + struct vm_area_struct* pmVmArea) +{ + int lRetValue; /* Function's return value */ + + /* Only the trace daemon is allowed access to mmap */ + if(current != sDaemonTaskStruct) + return -EACCES; + + /* Remap trace buffer into the process's memory space */ + lRetValue = tracer_mmap_region((char*) pmVmArea->vm_start, + sTracBuf, + pmVmArea->vm_end - pmVmArea->vm_start); + +#if 0 + printk("Tracer: Trace buffer virtual address => 0x%08X \n", (uint32_t)sTracBuf); + printk("Tracer: Trace buffer physical address => 0x%08X \n", (uint32_t)virt_to_phys(sTracBuf)); + printk("Tracer: Trace buffer virtual address in daemon space => 0x%08X \n", (uint32_t)pmVmArea->vm_start); + printk("Tracer: Trace buffer physical address in daemon space => 0x%08X \n", (uint32_t)virt_to_phys((void*)pmVmArea->vm_start)); +#endif + + /* Tell the caller that the memory mapping worked OK */ + return lRetValue; +} + +/************************************************************* + * Function : tracer_open() + * Description : "Open" file op + * Parameters : + * pmInode, the inode associated with the device + * pmFile, file structure given to the acting process + * Return values : + * 0, everything went OK + * -ENODEV, no such device. + * -EBUSY, daemon channel (minor number 0) already in use. + ************************************************************/ +int tracer_open(struct inode* pmInode, + struct file* pmFile) +{ + int lDevMinor = MINOR(pmInode->i_rdev) & 0xf; /* Device minor number */ + + /* Only minor number 0 and 1 are used */ + if((lDevMinor > 0) && (lDevMinor != 1)) + return -ENODEV; + + /* If the device has already been opened */ + if(sOpenCount) + { + /* Is there another process trying to open the daemon's channel (minor number 0) */ + if(lDevMinor == 0) + /* This isn't allowed */ + return -EBUSY; + else + /* Only increment use, this is just another user process trying to log user events */ + goto IncrementUse; + } + + /* Fetch the task structure of the process that opened the device */ + sDaemonTaskStruct = current; + + /* Reset the default configuration since this is the daemon and he will complete the setup */ + tracer_set_default_config(); + +#if 0 + /* DEBUG */ + printk("<1>Process %d opened the tracing device \n", sDaemonTaskStruct->pid); +#endif + +IncrementUse: + /* Lock the device */ + sOpenCount++; + +#ifdef MODULE + /* Increment module usage */ + MOD_INC_USE_COUNT; +#endif + + /* Everything is OK */ + return 0; +} + +/************************************************************* + * Function : tracer_release() + * Description : "Release" file op + * Parameters : + * pmInode, the inode associated with the device + * pmFile, file structure given to the acting process + * Return values : + * 0, everything went OK + * Note : + * It is assumed that if the tracing daemon dies, exits + * or simply stops existing, the kernel or "someone" will + * call tracer_release. Otherwise, we're in trouble ... + *************************************************************/ +int tracer_release(struct inode* pmInode, + struct file* pmFile) +{ + int lDevMinor = MINOR(pmInode->i_rdev) & 0xf; /* Device minor number */ + + /* Is this a simple user process exiting? */ + if(lDevMinor != 0) + goto DecrementUse; + + /* Did we loose any events */ + if(sEventsLost > 0) + printk(KERN_ALERT "Tracer: Lost %d events \n", sEventsLost); + + /* Reset the daemon PID */ + sDaemonTaskStruct = NULL; + + /* Free the current buffers, if any */ + if(sTracBuf != NULL) + rvfree(sTracBuf, sAllocSize); + + /* Reset the read and write buffers */ + sTracBuf = NULL; + sWritBuf = NULL; + sReadBuf = NULL; + sWritBufEnd = NULL; + sReadBufEnd = NULL; + sWritPos = NULL; + sReadLimit = NULL; + sWritLimit = NULL; + + /* Reset the tracer's configuration */ + tracer_set_default_config(); + sTracerStarted = FALSE; + + /* Reset number of bytes recorded and number of events lost */ + sBufReadComplete = 0; + sSizeReadIncomplete = 0; + sEventsLost = 0; + + /* Reset signal sent */ + sSignalSent = FALSE; + +DecrementUse: + /* Unlock the device */ + sOpenCount--; + +#ifdef MODULE + /* Decrement module usage */ + MOD_DEC_USE_COUNT; +#endif + + /* Tell the caller that everything is OK */ + return 0; +} + +/************************************************************* + * Function : tracer_fsync() + * Description : "Fsync" file op + * Parameters : + * pmFile, file structure given to the acting process + * pmDEntry, dentry associated with file + * Return values : + * 0, everything went OK + * -EACCESS, permission denied + * Note : + * We need to look the modifications of the values because + * they are read and written by trace(). + * Sonia : ne m oublie pas, je suis toujours a toi.... + *************************************************************/ +int tracer_fsync(struct file* pmFile, + struct dentry* pmDEntry, + int pmDataSync) +{ + unsigned long int lFlags; /* CPU flags for lock */ + + /* Only the trace daemon is allowed access to fsync */ + if(current != sDaemonTaskStruct) + return -EACCES; + + /* Lock the kernel */ + spin_lock_irqsave(&sSpinLock, lFlags); + + /* Reset the write positions */ + sWritPos = sWritBuf; + + /* Reset read limit */ + sReadLimit = sReadBuf; + + /* Reset bytes recorded */ + sBufReadComplete = 0; + sSizeReadIncomplete = 0; + sEventsLost = 0; + + /* Reset signal sent */ + sSignalSent = FALSE; + + /* Unlock the kernel */ + spin_unlock_irqrestore(&sSpinLock, lFlags); + + /* Tell the caller that everything is OK */ + return 0; +} + +/************************************************************* + * Function : tracer_set_buffer_size() + * Description : + * Sets the size of the buffers containing the trace data. + * Parameters : + * pmSize, Size of buffers + * Return values : + * 0, Size setting went OK + * -ENOMEM, unable to get a hold of memory for tracer + *************************************************************/ +int tracer_set_buffer_size(int pmSize) +{ + int lSizeAlloc; /* Size to be allocated */ + + /* Set size to allocate (= pmSize * 2) and fix it's size to be on a page boundary */ + lSizeAlloc = FIX_SIZE(pmSize << 1); + + /* Free the current buffers, if any */ + if(sTracBuf != NULL) + rvfree(sTracBuf, sAllocSize); + + /* Allocate space for the tracing buffers */ + if((sTracBuf = (char*) rvmalloc(lSizeAlloc)) == NULL) + return -ENOMEM; + + /* Remember the size set */ + sBufSize = pmSize; + sAllocSize = lSizeAlloc; + + /* Set the read and write buffers */ + sWritBuf = sTracBuf; + sReadBuf = sTracBuf + sBufSize; + + /* Set end of buffers */ + sWritBufEnd = sWritBuf + sBufSize; + sReadBufEnd = sReadBuf + sBufSize; + + /* Set write position */ + sWritPos = sWritBuf; + + /* Set read limit */ + sReadLimit = sReadBuf; + + /* Set write limit */ + sWritLimit = sWritBufEnd - TRACER_LAST_EVENT_SIZE; + + /* All is OK */ + return 0; +} + +/************************************************************* + * Function : tracer_set_default_config() + * Description : Sets the tracer in its default config + * Parameters : + * NONE + * Return values : + * 0, everything went OK + * -ENOMEM, unable to get a hold of memory for tracer + *************************************************************/ +int tracer_set_default_config(void) +{ + int i; /* Generic index */ + int lError = 0; /* Error, if any */ + + /* Initialize the event mask */ + sTracedEvents = 0; + + /* Initialize the event mask with all existing events with their details*/ + for(i = 0; i <= TRACE_EV_MAX; i++) + { + ltt_set_bit(i, &sTracedEvents); + ltt_set_bit(i, &sLogEventDetailsMask); + } + + /* Forget about the CPUID */ + sLogCPUID = FALSE; + + /* We aren't tracing any PID or GID in particular */ + sTracingPID = FALSE; + sTracingPGRP = FALSE; + sTracingGID = FALSE; + sTracingUID = FALSE; + + /* We aren't looking for a particular call depth */ + sSyscallEIPDepthSet = FALSE; + + /* We aren't going to place bounds on syscall EIP fetching */ + sUseSyscallEIPBounds = FALSE; + sLowerEIPBoundSet = FALSE; + sUpperEIPBoundSet = FALSE; + + /* Set the kernel trace configuration to it's basics */ + trace_set_config(trace, + sSyscallEIPDepthSet, + sUseSyscallEIPBounds, + 0, + 0, + 0); + + /* Return the error code */ + return lError; +} + +/************************************************************** + * Function : tracer_init() + * Description : Tracer initialization function. + * Parameters : + * NONE + * Return values : + * 0, everything went OK + * -ENONMEM, incapable of allocating necessary memory + * Forwarded error code otherwise + **************************************************************/ +int __init tracer_init(void) +{ + int lError = 0; /* Error, if any */ + + /* Initialize configuration */ + if((lError = tracer_set_default_config()) < 0) + return lError; + + /* Initialize open count */ + sOpenCount = 0; + + /* Initialize tracer lock */ + sTracLock = 0; + + /* Initialize signal sent */ + sSignalSent = FALSE; + + /* Initialize bytes read and events lost */ + sBufReadComplete = 0; + sSizeReadIncomplete = 0; + sEventsLost = 0; + + /* Initialize buffer ID */ + sBufferID = 0; + + /* Initialize tracing daemon task structure */ + sDaemonTaskStruct = NULL; + + /* Allocate memory for large data components */ + if((sUserEventData = vmalloc(CUSTOM_EVENT_MAX_SIZE)) < 0) + return -ENOMEM; + + /* Initialize spin lock */ + sSpinLock = SPIN_LOCK_UNLOCKED; + + /* Register the tracer as a char device */ + sMajorNumber = register_chrdev(0, TRACER_NAME, &sTracerFileOps); + + /* Register the tracer with the kernel */ + if((lError = register_tracer(trace)) < 0) + { + /* Tell the user about the problem */ + printk(KERN_ALERT "Tracer: Unable to register tracer with kernel, tracer disabled \n"); + + /* Make sure no one can open this device */ + sOpenCount = 1; + } + else + printk(KERN_INFO "Tracer: Initialization complete \n"); + + /* Return error code */ + return lError; +} + +/* Is this loaded as a module */ +#ifdef MODULE +/************************************************************** + * Function : cleanup_module() + * Description : Cleanup of the tracer. + * Parameters : NONE + * Return values : NONE + * Note : The order of the unregesterings is important. First, + * rule out any possibility of getting more trace + * data. Second, rule out any possibility of being read + * by the tracing daemon. Last, free the tracing + * buffer. + **************************************************************/ +void tracer_exit(void) +{ + /* Unregister the tracer from the kernel */ + unregister_tracer(trace); + + /* Unregister the tracer from being a char device */ + unregister_chrdev(sMajorNumber, TRACER_NAME); + + /* Free the current buffers, if any */ + if(sTracBuf != NULL) + rvfree(sTracBuf, sAllocSize); + + /* Paranoia */ + sTracBuf = NULL; +} +module_exit(tracer_exit); +#endif /* MODULE */ + +module_init(tracer_init); diff -urNp linux-2.4.7.SuSE-orig/drivers/trace/tracer.h linux-2.4.7.SuSE/drivers/trace/tracer.h --- linux-2.4.7.SuSE-orig/drivers/trace/tracer.h Wed Dec 31 19:00:00 1969 +++ linux-2.4.7.SuSE/drivers/trace/tracer.h Mon Jun 24 11:52:48 2002 @@ -0,0 +1,275 @@ +/* + * drivers/trace/tracer.h + * + * Copyright (C) 1999, 2000, 2001 Karim Yaghmour (karym@opersys.com) + * Portions contributed by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation + * + * This contains the necessary definitions the system tracer + */ + +#ifndef _TRACER_H +#define _TRACER_H + +/* Logic values */ +#define FALSE 0 +#define TRUE 1 + +/* Structure packing within the trace */ +#ifndef LTT_PACKED_STRUCT +#if LTT_UNPACKED_STRUCTS +#define LTT_PACKED_STRUCT +#else /* if LTT_UNPACKED_STRUCTS */ +#define LTT_PACKED_STRUCT __attribute__ ((packed)); +#endif /* if LTT_UNPACKED_STRUCTS */ +#endif /* if LTT_PACKED_STRUCT */ + +/* Tracer properties */ +#define TRACER_NAME "tracer" /* Name of the device as seen in /proc/devices */ + +/* Tracer buffer information */ +#define TRACER_DEFAULT_BUF_SIZE 50000 /* Default size of tracing buffer */ +#define TRACER_MIN_BUF_SIZE 1000 /* Minimum size of tracing buffer */ +#define TRACER_MAX_BUF_SIZE 500000 /* Maximum size of tracing buffer */ + +/* Local definitions */ +typedef uint32_t trace_time_delta; /* The type used to start the time delta between events */ + +/* Number of bytes set aside for last event */ +#define TRACER_LAST_EVENT_SIZE (sizeof(uint8_t) + sizeof(uint8_t) + sizeof(trace_time_delta) + sizeof(uint32_t)) + +/* Architecture types */ +#define TRACE_ARCH_TYPE_I386 1 /* i386 system */ +#define TRACE_ARCH_TYPE_PPC 2 /* PPC system */ +#define TRACE_ARCH_TYPE_SH 3 /* SH system */ +#define TRACE_ARCH_TYPE_S390 4 /* S/390 system */ +#define TRACE_ARCH_TYPE_MIPS 5 /* MIPS system */ + +/* Standard definitions for variants */ +#define TRACE_ARCH_VARIANT_NONE 0 /* Main architecture implementation */ + +/* PowerPC variants */ +#define TRACE_ARCH_VARIANT_PPC_4xx 1 /* 4xx systems (IBM embedded series) */ +#define TRACE_ARCH_VARIANT_PPC_6xx 2 /* 6xx/7xx/74xx/8260/POWER3 systems (desktop flavor) */ +#define TRACE_ARCH_VARIANT_PPC_8xx 3 /* 8xx system (Motoral embedded series) */ +#define TRACE_ARCH_VARIANT_PPC_ISERIES 4 /* 8xx system (iSeries) */ + +/* System types */ +#define TRACE_SYS_TYPE_VANILLA_LINUX 1 /* Vanilla linux kernel */ + +/* The information logged when the tracing is started */ +#define TRACER_MAGIC_NUMBER 0x00D6B7ED /* That day marks an important historical event ... */ +#define TRACER_VERSION_MAJOR 1 /* Major version number */ +#define TRACER_VERSION_MINOR 14 /* Minor version number */ +typedef struct _trace_start +{ + uint32_t MagicNumber; /* Magic number to identify a trace */ + uint32_t ArchType; /* Type of architecture */ + uint32_t ArchVariant; /* Variant of the given type of architecture */ + uint32_t SystemType; /* Operating system type */ + uint8_t MajorVersion; /* Major version of trace */ + uint8_t MinorVersion; /* Minor version of trace */ + + uint32_t BufferSize; /* Size of buffers */ + trace_event_mask EventMask; /* The event mask */ + trace_event_mask DetailsMask; /* Are the event details logged */ + uint8_t LogCPUID; /* Is the CPUID logged */ +} LTT_PACKED_STRUCT trace_start; + +/* Start and end of trace buffer information */ +typedef struct _trace_buffer_start +{ + struct timeval Time; /* Time stamp of this buffer */ + uint32_t ID; /* Unique buffer ID */ +} LTT_PACKED_STRUCT trace_buffer_start; + +/* The configurations possible */ +#define TRACER_START TRACER_MAGIC_NUMBER + 0 /* Start tracing events using the current configuration */ +#define TRACER_STOP TRACER_MAGIC_NUMBER + 1 /* Stop tracing */ +#define TRACER_CONFIG_DEFAULT TRACER_MAGIC_NUMBER + 2 /* Set the tracer to the default configuration */ +#define TRACER_CONFIG_MEMORY_BUFFERS TRACER_MAGIC_NUMBER + 3 /* Set the memory buffers the daemon wants us to use */ +#define TRACER_CONFIG_EVENTS TRACER_MAGIC_NUMBER + 4 /* Trace the given events */ +#define TRACER_CONFIG_DETAILS TRACER_MAGIC_NUMBER + 5 /* Record the details of the event, or not */ +#define TRACER_CONFIG_CPUID TRACER_MAGIC_NUMBER + 6 /* Record the CPUID associated with the event */ +#define TRACER_CONFIG_PID TRACER_MAGIC_NUMBER + 7 /* Trace only one process */ +#define TRACER_CONFIG_PGRP TRACER_MAGIC_NUMBER + 8 /* Trace only the given process group */ +#define TRACER_CONFIG_GID TRACER_MAGIC_NUMBER + 9 /* Trace the processes of a given group of users */ +#define TRACER_CONFIG_UID TRACER_MAGIC_NUMBER + 10 /* Trace the processes of a given user */ +#define TRACER_CONFIG_SYSCALL_EIP_DEPTH TRACER_MAGIC_NUMBER + 11 /* Set the call depth at which the EIP should be fetched on syscall */ +#define TRACER_CONFIG_SYSCALL_EIP_LOWER TRACER_MAGIC_NUMBER + 12 /* Set the lowerbound address from which EIP is recorded on syscall */ +#define TRACER_CONFIG_SYSCALL_EIP_UPPER TRACER_MAGIC_NUMBER + 13 /* Set the upperbound address from which EIP is recorded on syscall */ +#define TRACER_DATA_COMITTED TRACER_MAGIC_NUMBER + 14 /* The daemon has comitted the last trace */ +#define TRACER_GET_EVENTS_LOST TRACER_MAGIC_NUMBER + 15 /* Get the number of events lost */ +#define TRACER_CREATE_USER_EVENT TRACER_MAGIC_NUMBER + 16 /* Create a user tracable event */ +#define TRACER_DESTROY_USER_EVENT TRACER_MAGIC_NUMBER + 17 /* Destroy a user tracable event */ +#define TRACER_TRACE_USER_EVENT TRACER_MAGIC_NUMBER + 18 /* Trace a user event */ +#define TRACER_SET_EVENT_MASK TRACER_MAGIC_NUMBER + 19 /* Set the trace event mask */ +#define TRACER_GET_EVENT_MASK TRACER_MAGIC_NUMBER + 20 /* Get the trace event mask */ + +#ifdef __powerpc__ +/* We need to replace the usual PPC kernel bit manipulation functions with + * equivalent functions that are cross-platform compatible. The PPC kernel + * functions define bit order as follows: + * + * bit 0: 0x0000000100000000 + * bit 1: 0x0000000200000000 + * . + * . + * . + * bit 7: 0x0000008000000000 + * bit 8: 0x0000010000000000 + * bit 9: 0x0000020000000000 + * . + * . + * . + * bit 31: 0x8000000000000000 + * bit 32: 0x0000000000000001 + * bit 33: 0x0000000000000002 + * . + * . + * . + * bit 63: 0x0000000080000000 + * + * Our redefined functions define bit order the same as the kernel bit functions + * for x86 targets: + * + * bit 0: 0x0100000000000000 + * bit 1: 0x0200000000000000 + * . + * . + * . + * bit 7: 0x8000000000000000 + * bit 8: 0x0001000000000000 + * bit 9: 0x0002000000000000 + * . + * . + * . + * bit 31: 0x0000000800000000 + * bit 32: 0x0000000001000000 + * bit 33: 0x0000000002000000 + * . + * . + * . + * bit 63: 0x0000000000000080 + */ +static inline int ltt_set_bit(int nr, volatile void * addr) +{ + unsigned long old, t; + unsigned long mask = 1 << (24 - (nr & 0x18) + (nr & 0x7)); + volatile unsigned long *p = ((volatile unsigned long *)addr) + (nr >> 5); + + __asm__ __volatile__( + "1:lwarx %0,0,%3 \n\t" + "or %1,%0,%2 \n\t" + "stwcx. %1,0,%3 \n\t" + "bne 1b \n\t" + : "=&r" (old), "=&r" (t) /*, "=m" (*p)*/ + : "r" (mask), "r" (p) + /*: "cc" */); + + return (old & mask) != 0; +} + +static inline int ltt_clear_bit(unsigned long nr, volatile void *addr) +{ + unsigned long old, t; + unsigned long mask = 1 << (24 - (nr & 0x18) + (nr & 0x7)); + volatile unsigned long *p = ((volatile unsigned long *)addr) + (nr >> 5); + + __asm__ __volatile__("\n\ +1: lwarx %0,0,%3 + andc %1,%0,%2 + stwcx. %1,0,%3 + bne 1b" + : "=&r" (old), "=&r" (t) /*, "=m" (*p)*/ + : "r" (mask), "r" (p) + /*: "cc"*/); + + return (old & mask) != 0; +} + +static inline int ltt_test_bit(int nr, __const__ volatile void *addr) +{ + __const__ volatile unsigned int *p = (__const__ volatile unsigned int *) addr; + + return ((p[nr >> 5] >> (24 - (nr & 0x18) + (nr & 0x7))) & 1) != 0; +} +#else /* ifdef __powerpc__ */ +#ifdef __s390__ /* Added by T.H. */ +/* s390 bitops similar to powerpc. Will use functions taken from LTTTypes.h */ +extern __inline__ int ltt_set_bit(int nr, void * addr) +{ + unsigned char *p = addr; + unsigned char mask = 1 << (nr&7); + unsigned char old; + + p += nr>>3; + old = *p; + *p |= mask; + + return ((old & mask) != 0); +} + +extern __inline__ int ltt_clear_bit(int nr, void * addr) +{ + unsigned char *p = addr; + unsigned char mask = 1 << (nr&7); + unsigned char old; + + p += nr>>3; + old = *p; + *p &= ~mask; + + return ((old & mask) != 0); +} + +extern __inline__ int ltt_test_bit(int nr,void *addr) +{ + unsigned char *p = addr; + unsigned char mask = 1 << (nr&7); + + p += nr>>3; + + return ((*p & mask) != 0); +} +#else /* For non-powerpc and non-s390 processors we can use the kernel functions. */ +#define ltt_set_bit set_bit +#define ltt_clear_bit clear_bit +#define ltt_test_bit test_bit +#endif /* ifdef __s390__ */ +#endif /* ifdef __powerpc__ */ + +/* Function prototypes */ +int trace + (uint8_t, + void*); +void tracer_switch_buffers + (struct timeval); +int tracer_ioctl + (struct inode*, + struct file*, + unsigned int, + unsigned long); +int tracer_mmap + (struct file*, + struct vm_area_struct*); +int tracer_open + (struct inode*, + struct file*); +int tracer_release + (struct inode*, + struct file*); +int tracer_fsync + (struct file*, + struct dentry*, + int); +#ifdef MODULE +void tracer_exit + (void); +#endif +int tracer_set_buffer_size + (int); +int tracer_set_default_config + (void); +int tracer_init + (void); +#endif /* _TRACER_H */ diff -urNp linux-2.4.7.SuSE-orig/fs/buffer.c linux-2.4.7.SuSE/fs/buffer.c --- linux-2.4.7.SuSE-orig/fs/buffer.c Tue Oct 30 16:43:44 2001 +++ linux-2.4.7.SuSE/fs/buffer.c Mon Jun 24 11:52:48 2002 @@ -47,6 +47,8 @@ #include #include +#include + #include #include #include @@ -159,6 +161,7 @@ void __wait_on_buffer(struct buffer_head get_bh(bh); add_wait_queue(&bh->b_wait, &wait); do { + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_BUF_WAIT_START, 0, 0, NULL); run_task_queue(&tq_disk); set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!buffer_locked(bh)) @@ -166,6 +169,7 @@ void __wait_on_buffer(struct buffer_head schedule(); } while (buffer_locked(bh)); tsk->state = TASK_RUNNING; + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_BUF_WAIT_END, 0, 0, NULL); remove_wait_queue(&bh->b_wait, &wait); put_bh(bh); } diff -urNp linux-2.4.7.SuSE-orig/fs/exec.c linux-2.4.7.SuSE/fs/exec.c --- linux-2.4.7.SuSE-orig/fs/exec.c Tue Oct 30 16:43:44 2001 +++ linux-2.4.7.SuSE/fs/exec.c Mon Jun 24 11:52:48 2002 @@ -37,6 +37,8 @@ #define __NO_VERSION__ #include +#include + #include #include #include @@ -853,6 +855,11 @@ int do_execve(char * filename, char ** a retval = PTR_ERR(file); if (IS_ERR(file)) return retval; + + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_EXEC, + 0, + file->f_dentry->d_name.len, + file->f_dentry->d_name.name); bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); diff -urNp linux-2.4.7.SuSE-orig/fs/ioctl.c linux-2.4.7.SuSE/fs/ioctl.c --- linux-2.4.7.SuSE-orig/fs/ioctl.c Fri Feb 9 14:29:44 2001 +++ linux-2.4.7.SuSE/fs/ioctl.c Mon Jun 24 11:52:48 2002 @@ -8,6 +8,8 @@ #include #include +#include + #include #include @@ -56,6 +58,10 @@ asmlinkage long sys_ioctl(unsigned int f if (!filp) goto out; error = 0; + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_IOCTL, + fd, + cmd, + NULL); lock_kernel(); switch (cmd) { case FIOCLEX: diff -urNp linux-2.4.7.SuSE-orig/fs/open.c linux-2.4.7.SuSE/fs/open.c --- linux-2.4.7.SuSE-orig/fs/open.c Tue Oct 30 16:43:44 2001 +++ linux-2.4.7.SuSE/fs/open.c Mon Jun 24 11:52:48 2002 @@ -16,6 +16,8 @@ #include #include +#include + #include #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) @@ -768,6 +770,10 @@ asmlinkage long sys_open(const char * fi error = PTR_ERR(f); if (IS_ERR(f)) goto out_error; + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_OPEN, + fd, + f->f_dentry->d_name.len, + f->f_dentry->d_name.name); fd_install(fd, f); } out: @@ -834,6 +840,10 @@ asmlinkage long sys_close(unsigned int f filp = files->fd[fd]; if (!filp) goto out_unlock; + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_CLOSE, + fd, + 0, + NULL); files->fd[fd] = NULL; FD_CLR(fd, files->close_on_exec); __put_unused_fd(files, fd); diff -urNp linux-2.4.7.SuSE-orig/fs/read_write.c linux-2.4.7.SuSE/fs/read_write.c --- linux-2.4.7.SuSE-orig/fs/read_write.c Tue Apr 17 17:36:44 2001 +++ linux-2.4.7.SuSE/fs/read_write.c Mon Jun 24 11:52:48 2002 @@ -12,6 +12,8 @@ #include #include +#include + #include struct file_operations generic_ro_fops = { @@ -77,6 +79,10 @@ asmlinkage off_t sys_lseek(unsigned int if (res != (loff_t)retval) retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ } + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_SEEK, + fd, + offset, + NULL); fput(file); bad: return retval; @@ -102,6 +108,11 @@ asmlinkage long sys_llseek(unsigned int offset = llseek(file, ((loff_t) offset_high << 32) | offset_low, origin); + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_SEEK, + fd, + offset, + NULL); + retval = (int)offset; if (offset >= 0) { retval = -EFAULT; @@ -129,8 +140,13 @@ asmlinkage ssize_t sys_read(unsigned int if (!ret) { ssize_t (*read)(struct file *, char *, size_t, loff_t *); ret = -EINVAL; - if (file->f_op && (read = file->f_op->read) != NULL) + if (file->f_op && (read = file->f_op->read) != NULL) { + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_READ, + fd, + count, + NULL); ret = read(file, buf, count, &file->f_pos); + } } } if (ret > 0) @@ -156,8 +172,13 @@ asmlinkage ssize_t sys_write(unsigned in if (!ret) { ssize_t (*write)(struct file *, const char *, size_t, loff_t *); ret = -EINVAL; - if (file->f_op && (write = file->f_op->write) != NULL) + if (file->f_op && (write = file->f_op->write) != NULL) { + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_WRITE, + fd, + count, + NULL); ret = write(file, buf, count, &file->f_pos); + } } } if (ret > 0) @@ -282,6 +303,10 @@ asmlinkage ssize_t sys_readv(unsigned lo file = fget(fd); if (!file) goto bad_file; + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_READ, + fd, + count, + NULL); if (file->f_op && (file->f_mode & FMODE_READ) && (file->f_op->readv || file->f_op->read)) ret = do_readv_writev(VERIFY_WRITE, file, vector, count); @@ -302,6 +327,10 @@ asmlinkage ssize_t sys_writev(unsigned l file = fget(fd); if (!file) goto bad_file; + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_WRITE, + fd, + count, + NULL); if (file->f_op && (file->f_mode & FMODE_WRITE) && (file->f_op->writev || file->f_op->write)) ret = do_readv_writev(VERIFY_READ, file, vector, count); @@ -337,6 +366,12 @@ asmlinkage ssize_t sys_pread(unsigned in goto out; if (pos < 0) goto out; + + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_READ, + fd, + count, + NULL); + ret = read(file, buf, count, &pos); if (ret > 0) inode_dir_notify(file->f_dentry->d_parent->d_inode, DN_ACCESS); @@ -368,6 +403,11 @@ asmlinkage ssize_t sys_pwrite(unsigned i goto out; if (pos < 0) goto out; + + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_WRITE, + fd, + count, + NULL); ret = write(file, buf, count, &pos); if (ret > 0) diff -urNp linux-2.4.7.SuSE-orig/fs/select.c linux-2.4.7.SuSE/fs/select.c --- linux-2.4.7.SuSE-orig/fs/select.c Tue Oct 30 16:43:35 2001 +++ linux-2.4.7.SuSE/fs/select.c Mon Jun 24 11:52:48 2002 @@ -19,6 +19,8 @@ #include #include +#include + #include #define ROUND_UP(x,y) (((x)+(y)-1)/(y)) @@ -192,6 +194,10 @@ int do_select(int n, fd_set_bits *fds, l file = fget(i); mask = POLLNVAL; if (file) { + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_SELECT, + i /* The fd*/, + __timeout, + NULL); mask = DEFAULT_POLLMASK; if (file->f_op && file->f_op->poll) mask = file->f_op->poll(file, wait); @@ -366,6 +372,10 @@ static void do_pollfd(unsigned int num, struct file * file = fget(fd); mask = POLLNVAL; if (file != NULL) { + TRACE_FILE_SYSTEM(TRACE_EV_FILE_SYSTEM_POLL, + fd, + 0, + NULL); mask = DEFAULT_POLLMASK; if (file->f_op && file->f_op->poll) mask = file->f_op->poll(file, *pwait); diff -urNp linux-2.4.7.SuSE-orig/include/linux/trace.h linux-2.4.7.SuSE/include/linux/trace.h --- linux-2.4.7.SuSE-orig/include/linux/trace.h Wed Dec 31 19:00:00 1969 +++ linux-2.4.7.SuSE/include/linux/trace.h Mon Jun 24 11:52:48 2002 @@ -0,0 +1,432 @@ +/* + * linux/include/linux/trace.h + * + * Copyright (C) 1999, Karim Yaghmour + * + * This contains the necessary definitions for tracing the + * the system. + */ + +#ifndef _LINUX_TRACE_H +#define _LINUX_TRACE_H + +#include +#include + +/* Is kernel tracing enabled */ +#if defined(CONFIG_TRACE) || defined(CONFIG_TRACE_MODULE) + +/* Structure packing within the trace */ +#if LTT_UNPACKED_STRUCTS +#define LTT_PACKED_STRUCT +#else /* if LTT_UNPACKED_STRUCTS */ +#define LTT_PACKED_STRUCT __attribute__ ((packed)) +#endif /* if LTT_UNPACKED_STRUCTS */ + +/* The prototype of the tracer call (EventID, *EventStruct) */ +typedef int (*tracer_call) (uint8_t, void*); + +/* This structure contains all the information needed to be known + about the tracing module. */ +struct tracer +{ + /* The tracing routine itself */ + tracer_call trace; + + /* Fetch of eip origin of syscall */ + int fetch_syscall_eip_use_depth; /* Use the given depth */ + int fetch_syscall_eip_use_bounds; /* Find eip in bounds */ + int syscall_eip_depth; /* Call depth at which eip is fetched */ + void* syscall_lower_eip_bound; /* Lower eip bound */ + void* syscall_upper_eip_bound; /* Higher eip bound */ +}; + +/* Maximal size a custom event can have */ +#define CUSTOM_EVENT_MAX_SIZE 8192 + +/* String length limits for custom events creation */ +#define CUSTOM_EVENT_TYPE_STR_LEN 20 +#define CUSTOM_EVENT_DESC_STR_LEN 100 +#define CUSTOM_EVENT_FORM_STR_LEN 256 +#define CUSTOM_EVENT_FINAL_STR_LEN 200 + +/* Type of custom event formats */ +#define CUSTOM_EVENT_FORMAT_TYPE_NONE 0 +#define CUSTOM_EVENT_FORMAT_TYPE_STR 1 +#define CUSTOM_EVENT_FORMAT_TYPE_HEX 2 +#define CUSTOM_EVENT_FORMAT_TYPE_XML 3 +#define CUSTOM_EVENT_FORMAT_TYPE_IBM 4 + +/* The functions to the tracer management code */ +int register_tracer + (tracer_call /* The tracer function */); +int unregister_tracer + (tracer_call /* The tracer function */); +int trace_set_config + (tracer_call /* The tracer function */, + int /* Use depth to fetch eip */, + int /* Use bounds to fetch eip */, + int /* Detph to fetch eip */, + void* /* Lower bound eip address */, + void* /* Upper bound eip address */); +int trace_register_callback + (tracer_call /* The callback to add */, + uint8_t /* The event ID targeted */); +int trace_unregister_callback + (tracer_call /* The callback to remove */, + uint8_t /* The event ID targeted */); +int trace_get_config + (int* /* Use depth to fetch eip */, + int* /* Use bounds to fetch eip */, + int* /* Detph to fetch eip */, + void** /* Lower bound eip address */, + void** /* Upper bound eip address */); +int trace_create_event + (char* /* String describing event type */, + char* /* String to format standard event description */, + int /* Type of formatting used to log event data */, + char* /* Data specific to format */); +int trace_create_owned_event + (char* /* String describing event type */, + char* /* String to format standard event description */, + int /* Type of formatting used to log event data */, + char* /* Data specific to format */, + pid_t /* PID of event's owner */); +void trace_destroy_event + (int /* The event ID given by trace_create_event() */); +void trace_destroy_owners_events + (pid_t /* The PID of the process' who's events are to be deleted */); +void trace_reregister_custom_events + (void); +int trace_std_formatted_event + (int /* The event ID given by trace_create_event() */, + ... /* The parameters to be printed out in the event string */); +int trace_raw_event + (int /* The event ID given by trace_create_event() */, + int /* The size of the raw data */, + void* /* Pointer to the raw event data */); +int trace_event + (uint8_t /* Event ID (as defined in this header file) */, + void* /* Structure describing the event */); + +/* Generic macros */ +#define TRACE_EVENT(ID, DATA) trace_event(ID, DATA) + +/* Traced events */ +#define TRACE_EV_START 0 /* This is to mark the trace's start */ +#define TRACE_EV_SYSCALL_ENTRY 1 /* Entry in a given system call */ +#define TRACE_EV_SYSCALL_EXIT 2 /* Exit from a given system call */ +#define TRACE_EV_TRAP_ENTRY 3 /* Entry in a trap */ +#define TRACE_EV_TRAP_EXIT 4 /* Exit from a trap */ +#define TRACE_EV_IRQ_ENTRY 5 /* Entry in an irq */ +#define TRACE_EV_IRQ_EXIT 6 /* Exit from an irq */ +#define TRACE_EV_SCHEDCHANGE 7 /* Scheduling change */ +#define TRACE_EV_KERNEL_TIMER 8 /* The kernel timer routine has been called */ +#define TRACE_EV_SOFT_IRQ 9 /* Hit key part of soft-irq management */ +#define TRACE_EV_PROCESS 10 /* Hit key part of process management */ +#define TRACE_EV_FILE_SYSTEM 11 /* Hit key part of file system */ +#define TRACE_EV_TIMER 12 /* Hit key part of timer management */ +#define TRACE_EV_MEMORY 13 /* Hit key part of memory management */ +#define TRACE_EV_SOCKET 14 /* Hit key part of socket communication */ +#define TRACE_EV_IPC 15 /* Hit key part of System V IPC */ +#define TRACE_EV_NETWORK 16 /* Hit key part of network communication */ + +#define TRACE_EV_BUFFER_START 17 /* Mark the begining of a trace buffer */ +#define TRACE_EV_BUFFER_END 18 /* Mark the ending of a trace buffer */ +#define TRACE_EV_NEW_EVENT 19 /* New event type */ +#define TRACE_EV_CUSTOM 20 /* Custom event */ + +#define TRACE_EV_CHANGE_MASK 21 /* Change in event mask */ + +/* Number of traced events */ +#define TRACE_EV_MAX TRACE_EV_CHANGE_MASK + +/* Structures and macros for events */ +/* TRACE_SYSCALL_ENTRY */ +typedef struct _trace_syscall_entry +{ + uint8_t syscall_id; /* Syscall entry number in entry.S */ + uint32_t address; /* Address from which call was made */ +} LTT_PACKED_STRUCT trace_syscall_entry; + +/* TRACE_TRAP_ENTRY */ +#ifndef __s390__ +typedef struct _trace_trap_entry +{ + uint16_t trap_id; /* Trap number */ + uint32_t address; /* Address where trap occured */ +} LTT_PACKED_STRUCT trace_trap_entry; +#else +typedef struct _trace_trap_entry +{ + uint64_t trap_id; /* Trap number */ + uint32_t address; /* Address where trap occured */ +} LTT_PACKED_STRUCT trace_trap_entry; +#endif +#define TRACE_TRAP_ENTRY(ID, EIP) \ + do \ + {\ + trace_trap_entry trap_event;\ + trap_event.trap_id = ID;\ + trap_event.address = EIP;\ + trace_event(TRACE_EV_TRAP_ENTRY, &trap_event);\ + } while(0); + +/* TRACE_TRAP_EXIT */ +#define TRACE_TRAP_EXIT() trace_event(TRACE_EV_TRAP_EXIT, NULL) + +/* TRACE_IRQ_ENTRY */ +typedef struct _trace_irq_entry +{ + uint8_t irq_id; /* IRQ number */ + uint8_t kernel; /* Are we executing kernel code */ +} LTT_PACKED_STRUCT trace_irq_entry; +#define TRACE_IRQ_ENTRY(ID, KERNEL) \ + do \ + {\ + trace_irq_entry irq_entry;\ + irq_entry.irq_id = ID;\ + irq_entry.kernel = KERNEL;\ + trace_event(TRACE_EV_IRQ_ENTRY, &irq_entry);\ + } while(0); + +/* TRACE_IRQ_EXIT */ +#define TRACE_IRQ_EXIT() trace_event(TRACE_EV_IRQ_EXIT, NULL) + +/* TRACE_SCHEDCHANGE */ +typedef struct _trace_schedchange +{ + uint32_t out; /* Outgoing process */ + uint32_t in; /* Incoming process */ + uint32_t out_state; /* Outgoing process' state */ +} LTT_PACKED_STRUCT trace_schedchange; +#define TRACE_SCHEDCHANGE(OUT, IN) \ + do \ + {\ + trace_schedchange sched_event;\ + sched_event.out = OUT->pid;\ + sched_event.in = (uint32_t) IN;\ + sched_event.out_state = OUT->state; \ + trace_event(TRACE_EV_SCHEDCHANGE, &sched_event);\ + } while(0); + +/* TRACE_SOFT_IRQ */ +#define TRACE_EV_SOFT_IRQ_BOTTOM_HALF 1 /* Conventional bottom-half */ +#define TRACE_EV_SOFT_IRQ_SOFT_IRQ 2 /* Real soft-irq */ +#define TRACE_EV_SOFT_IRQ_TASKLET_ACTION 3 /* Tasklet action */ +#define TRACE_EV_SOFT_IRQ_TASKLET_HI_ACTION 4 /* Tasklet hi-action */ +typedef struct _trace_soft_irq +{ + uint8_t event_sub_id; /* Soft-irq event Id */ + uint32_t event_data; /* Data associated with event */ +} LTT_PACKED_STRUCT trace_soft_irq; +#define TRACE_SOFT_IRQ(ID, DATA) \ + do \ + {\ + trace_soft_irq soft_irq_event;\ + soft_irq_event.event_sub_id = ID;\ + soft_irq_event.event_data = DATA;\ + trace_event(TRACE_EV_SOFT_IRQ, &soft_irq_event);\ + } while(0); + +/* TRACE_PROCESS */ +#define TRACE_EV_PROCESS_KTHREAD 1 /* Creation of a kernel thread */ +#define TRACE_EV_PROCESS_FORK 2 /* A fork or clone occured */ +#define TRACE_EV_PROCESS_EXIT 3 /* An exit occured */ +#define TRACE_EV_PROCESS_WAIT 4 /* A wait occured */ +#define TRACE_EV_PROCESS_SIGNAL 5 /* A signal has been sent */ +#define TRACE_EV_PROCESS_WAKEUP 6 /* Wake up a process */ +typedef struct _trace_process +{ + uint8_t event_sub_id; /* Process event ID */ + uint32_t event_data1; /* Data associated with event */ + uint32_t event_data2; +} LTT_PACKED_STRUCT trace_process; +#define TRACE_PROCESS(ID, DATA1, DATA2) \ + do \ + {\ + trace_process proc_event;\ + proc_event.event_sub_id = ID;\ + proc_event.event_data1 = DATA1;\ + proc_event.event_data2 = DATA2;\ + trace_event(TRACE_EV_PROCESS, &proc_event);\ + } while(0); + +/* TRACE_FILE_SYSTEM */ +#define TRACE_EV_FILE_SYSTEM_BUF_WAIT_START 1 /* Starting to wait for a data buffer */ +#define TRACE_EV_FILE_SYSTEM_BUF_WAIT_END 2 /* End to wait for a data buffer */ +#define TRACE_EV_FILE_SYSTEM_EXEC 3 /* An exec occured */ +#define TRACE_EV_FILE_SYSTEM_OPEN 4 /* An open occured */ +#define TRACE_EV_FILE_SYSTEM_CLOSE 5 /* A close occured */ +#define TRACE_EV_FILE_SYSTEM_READ 6 /* A read occured */ +#define TRACE_EV_FILE_SYSTEM_WRITE 7 /* A write occured */ +#define TRACE_EV_FILE_SYSTEM_SEEK 8 /* A seek occured */ +#define TRACE_EV_FILE_SYSTEM_IOCTL 9 /* An ioctl occured */ +#define TRACE_EV_FILE_SYSTEM_SELECT 10 /* A select occured */ +#define TRACE_EV_FILE_SYSTEM_POLL 11 /* A poll occured */ +typedef struct _trace_file_system +{ + uint8_t event_sub_id; /* File system event ID */ + uint32_t event_data1; /* Event data */ + uint32_t event_data2; /* Event data 2 */ + char* file_name; /* Name of file operated on */ +} LTT_PACKED_STRUCT trace_file_system; +#define TRACE_FILE_SYSTEM(ID, DATA1, DATA2, FILE_NAME) \ + do \ + {\ + trace_file_system fs_event;\ + fs_event.event_sub_id = ID;\ + fs_event.event_data1 = DATA1;\ + fs_event.event_data2 = DATA2;\ + fs_event.file_name = (char*)FILE_NAME;\ + trace_event(TRACE_EV_FILE_SYSTEM, &fs_event);\ + } while(0); + +/* TRACE_TIMER */ +#define TRACE_EV_TIMER_EXPIRED 1 /* Timer expired */ +#define TRACE_EV_TIMER_SETITIMER 2 /* Setting itimer occurred */ +#define TRACE_EV_TIMER_SETTIMEOUT 3 /* Setting sched timeout occurred */ +typedef struct _trace_timer +{ + uint8_t event_sub_id; /* Timer event ID */ + uint8_t event_sdata; /* Short data */ + uint32_t event_data1; /* Data associated with event */ + uint32_t event_data2; +} LTT_PACKED_STRUCT trace_timer; +#define TRACE_TIMER(ID, SDATA, DATA1, DATA2) \ + do \ + {\ + trace_timer timer_event;\ + timer_event.event_sub_id = ID;\ + timer_event.event_sdata = SDATA;\ + timer_event.event_data1 = DATA1;\ + timer_event.event_data2 = DATA2;\ + trace_event(TRACE_EV_TIMER, &timer_event);\ + } while(0); + +/* TRACE_MEMORY */ +#define TRACE_EV_MEMORY_PAGE_ALLOC 1 /* Allocating pages */ +#define TRACE_EV_MEMORY_PAGE_FREE 2 /* Freing pages */ +#define TRACE_EV_MEMORY_SWAP_IN 3 /* Swaping pages in */ +#define TRACE_EV_MEMORY_SWAP_OUT 4 /* Swaping pages out */ +#define TRACE_EV_MEMORY_PAGE_WAIT_START 5 /* Start to wait for page */ +#define TRACE_EV_MEMORY_PAGE_WAIT_END 6 /* End to wait for page */ +typedef struct _trace_memory +{ + uint8_t event_sub_id; /* Memory event ID */ + unsigned long event_data; /* Data associated with event */ +} LTT_PACKED_STRUCT trace_memory; +#define TRACE_MEMORY(ID, DATA) \ + do \ + {\ + trace_memory memory_event;\ + memory_event.event_sub_id = ID;\ + memory_event.event_data = DATA;\ + trace_event(TRACE_EV_MEMORY, &memory_event);\ + } while(0); + +/* TRACE_SOCKET */ +#define TRACE_EV_SOCKET_CALL 1 /* A socket call occured */ +#define TRACE_EV_SOCKET_CREATE 2 /* A socket has been created */ +#define TRACE_EV_SOCKET_SEND 3 /* Data was sent to a socket */ +#define TRACE_EV_SOCKET_RECEIVE 4 /* Data was read from a socket */ +typedef struct _trace_socket +{ + uint8_t event_sub_id; /* Socket event ID */ + uint32_t event_data1; /* Data associated with event */ + uint32_t event_data2; /* Data associated with event */ +} LTT_PACKED_STRUCT trace_socket; +#define TRACE_SOCKET(ID, DATA1, DATA2) \ + do \ + {\ + trace_socket socket_event;\ + socket_event.event_sub_id = ID;\ + socket_event.event_data1 = DATA1;\ + socket_event.event_data2 = DATA2;\ + trace_event(TRACE_EV_SOCKET, &socket_event);\ + } while(0); + +/* TRACE_IPC */ +#define TRACE_EV_IPC_CALL 1 /* A System V IPC call occured */ +#define TRACE_EV_IPC_MSG_CREATE 2 /* A message queue has been created */ +#define TRACE_EV_IPC_SEM_CREATE 3 /* A semaphore was created */ +#define TRACE_EV_IPC_SHM_CREATE 4 /* A shared memory segment has been created */ +typedef struct _trace_ipc +{ + uint8_t event_sub_id; /* IPC event ID */ + uint32_t event_data1; /* Data associated with event */ + uint32_t event_data2; /* Data associated with event */ +} LTT_PACKED_STRUCT trace_ipc; +#define TRACE_IPC(ID, DATA1, DATA2) \ + do \ + {\ + trace_ipc ipc_event;\ + ipc_event.event_sub_id = ID;\ + ipc_event.event_data1 = DATA1;\ + ipc_event.event_data2 = DATA2;\ + trace_event(TRACE_EV_IPC, &ipc_event);\ + } while(0); + +/* TRACE_NETWORK */ +#define TRACE_EV_NETWORK_PACKET_IN 1 /* A packet came in */ +#define TRACE_EV_NETWORK_PACKET_OUT 2 /* A packet was sent */ +typedef struct _trace_network +{ + uint8_t event_sub_id; /* Network event ID */ + uint32_t event_data; /* Event data */ +} LTT_PACKED_STRUCT trace_network; +#define TRACE_NETWORK(ID, DATA) \ + do \ + {\ + trace_network net_event;\ + net_event.event_sub_id = ID;\ + net_event.event_data = DATA;\ + trace_event(TRACE_EV_NETWORK, &net_event);\ + } while(0); + +/* Custom declared events */ +/* ***WARNING*** These structures should never be used as is, use the provided custom event creation + and logging functions. */ +typedef struct _trace_new_event +{ + /* Basics */ + uint32_t id; /* Custom event ID */ + char type[CUSTOM_EVENT_TYPE_STR_LEN]; /* Event type description */ + char desc[CUSTOM_EVENT_DESC_STR_LEN]; /* Detailed event description */ + + /* Custom formatting */ + uint32_t format_type; /* Type of formatting */ + char form[CUSTOM_EVENT_FORM_STR_LEN]; /* Data specific to format */ +} LTT_PACKED_STRUCT trace_new_event; +typedef struct _trace_custom +{ + uint32_t id; /* Event ID */ + uint32_t data_size; /* Size of data recorded by event */ + void* data; /* Data recorded by event */ +} LTT_PACKED_STRUCT trace_custom; + +/* TRACE_CHANGE_MASK */ +typedef uint64_t trace_event_mask; /* The event mask type */ +typedef struct _trace_change_mask +{ + trace_event_mask mask; /* Event mask */ +} LTT_PACKED_STRUCT trace_change_mask; + +#else /* Kernel is configured without tracing */ +#define TRACE_EVENT(ID, DATA) +#define TRACE_TRAP_ENTRY(ID, EIP) +#define TRACE_TRAP_EXIT() +#define TRACE_IRQ_ENTRY(ID, KERNEL) +#define TRACE_IRQ_EXIT() +#define TRACE_SCHEDCHANGE(OUT, IN) +#define TRACE_SOFT_IRQ(ID, DATA) +#define TRACE_PROCESS(ID, DATA1, DATA2) +#define TRACE_FILE_SYSTEM(ID, DATA1, DATA2, FILE_NAME) +#define TRACE_TIMER(ID, SDATA, DATA1, DATA2) +#define TRACE_MEMORY(ID, DATA) +#define TRACE_SOCKET(ID, DATA1, DATA2) +#define TRACE_IPC(ID, DATA1, DATA2) +#define TRACE_NETWORK(ID, DATA) +#endif /* defined(CONFIG_TRACE) || defined(CONFIG_TRACE_MODULE) */ + +#endif /* _LINUX_TRACE_H */ diff -urNp linux-2.4.7.SuSE-orig/ipc/msg.c linux-2.4.7.SuSE/ipc/msg.c --- linux-2.4.7.SuSE-orig/ipc/msg.c Mon Feb 19 13:18:18 2001 +++ linux-2.4.7.SuSE/ipc/msg.c Mon Jun 24 11:52:48 2002 @@ -25,6 +25,8 @@ #include #include "util.h" +#include + /* sysctl: */ int msg_ctlmax = MSGMAX; int msg_ctlmnb = MSGMNB; @@ -326,6 +328,7 @@ asmlinkage long sys_msgget (key_t key, i msg_unlock(id); } up(&msg_ids.sem); + TRACE_IPC(TRACE_EV_IPC_MSG_CREATE, ret, msgflg); return ret; } diff -urNp linux-2.4.7.SuSE-orig/ipc/sem.c linux-2.4.7.SuSE/ipc/sem.c --- linux-2.4.7.SuSE-orig/ipc/sem.c Mon Feb 19 13:18:18 2001 +++ linux-2.4.7.SuSE/ipc/sem.c Mon Jun 24 11:52:48 2002 @@ -63,6 +63,7 @@ #include #include "util.h" +#include #define sem_lock(id) ((struct sem_array*)ipc_lock(&sem_ids,id)) #define sem_unlock(id) ipc_unlock(&sem_ids,id) @@ -179,6 +180,7 @@ asmlinkage long sys_semget (key_t key, i } up(&sem_ids.sem); + TRACE_IPC(TRACE_EV_IPC_SEM_CREATE, err, semflg); return err; } diff -urNp linux-2.4.7.SuSE-orig/ipc/shm.c linux-2.4.7.SuSE/ipc/shm.c --- linux-2.4.7.SuSE-orig/ipc/shm.c Sat May 19 20:47:55 2001 +++ linux-2.4.7.SuSE/ipc/shm.c Mon Jun 24 11:52:48 2002 @@ -26,6 +26,8 @@ #include "util.h" +#include + struct shmid_kernel /* private to the kernel */ { struct kern_ipc_perm shm_perm; @@ -252,6 +254,7 @@ asmlinkage long sys_shmget (key_t key, s shm_unlock(id); } up(&shm_ids.sem); + TRACE_IPC(TRACE_EV_IPC_SHM_CREATE, err, shmflg); return err; } diff -urNp linux-2.4.7.SuSE-orig/kernel/Makefile linux-2.4.7.SuSE/kernel/Makefile --- linux-2.4.7.SuSE-orig/kernel/Makefile Fri Dec 29 17:07:24 2000 +++ linux-2.4.7.SuSE/kernel/Makefile Mon Jun 24 11:52:48 2002 @@ -9,7 +9,7 @@ O_TARGET := kernel.o -export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o +export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o trace.o obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ module.o exit.o itimer.o info.o time.o softirq.o resource.o \ @@ -19,6 +19,10 @@ obj-y = sched.o dma.o fork.o exec_do obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += ksyms.o obj-$(CONFIG_PM) += pm.o + +ifdef CONFIG_TRACE +obj-y += trace.o +endif ifneq ($(CONFIG_IA64),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff -urNp linux-2.4.7.SuSE-orig/kernel/exit.c linux-2.4.7.SuSE/kernel/exit.c --- linux-2.4.7.SuSE-orig/kernel/exit.c Tue Oct 30 16:43:40 2001 +++ linux-2.4.7.SuSE/kernel/exit.c Mon Jun 24 11:52:48 2002 @@ -14,6 +14,8 @@ #include #endif +#include + #include #include #include @@ -452,6 +454,7 @@ fake_volatile: if (tng_exitfunc != NULL) { (*tng_exitfunc)((int) code) ; } + TRACE_PROCESS(TRACE_EV_PROCESS_EXIT, 0, 0); lock_kernel(); sem_exit(); __exit_files(tsk); @@ -507,6 +510,8 @@ asmlinkage long sys_wait4(pid_t pid,unsi if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL)) return -EINVAL; + + TRACE_PROCESS(TRACE_EV_PROCESS_WAIT, pid, 0); add_wait_queue(¤t->wait_chldexit,&wait); repeat: diff -urNp linux-2.4.7.SuSE-orig/kernel/fork.c linux-2.4.7.SuSE/kernel/fork.c --- linux-2.4.7.SuSE-orig/kernel/fork.c Tue Oct 30 16:43:40 2001 +++ linux-2.4.7.SuSE/kernel/fork.c Mon Jun 24 11:52:48 2002 @@ -20,6 +20,8 @@ #include #include +#include + #include #include #include @@ -718,6 +720,9 @@ int do_fork(unsigned long clone_flags, u if (p->ptrace & PT_PTRACED) send_sig(SIGSTOP, p, 1); + + /* Trace the event */ + TRACE_PROCESS(TRACE_EV_PROCESS_FORK, retval, 0); if (tng_forkfunc != NULL) { (*tng_forkfunc)((struct task_struct *) p) ; diff -urNp linux-2.4.7.SuSE-orig/kernel/itimer.c linux-2.4.7.SuSE/kernel/itimer.c --- linux-2.4.7.SuSE-orig/kernel/itimer.c Thu Jun 29 13:07:36 2000 +++ linux-2.4.7.SuSE/kernel/itimer.c Mon Jun 24 11:52:48 2002 @@ -10,6 +10,8 @@ #include #include +#include + #include /* @@ -95,6 +97,8 @@ void it_real_fn(unsigned long __data) struct task_struct * p = (struct task_struct *) __data; unsigned long interval; + TRACE_TIMER(TRACE_EV_TIMER_EXPIRED, 0, 0, 0); + send_sig(SIGALRM, p, 1); interval = p->it_real_incr; if (interval) { @@ -114,6 +118,7 @@ int do_setitimer(int which, struct itime j = tvtojiffies(&value->it_value); if (ovalue && (k = do_getitimer(which, ovalue)) < 0) return k; + TRACE_TIMER(TRACE_EV_TIMER_SETITIMER, which, i, j); switch (which) { case ITIMER_REAL: del_timer_sync(¤t->real_timer); diff -urNp linux-2.4.7.SuSE-orig/kernel/sched.c linux-2.4.7.SuSE/kernel/sched.c --- linux-2.4.7.SuSE-orig/kernel/sched.c Tue Oct 30 16:43:44 2001 +++ linux-2.4.7.SuSE/kernel/sched.c Mon Jun 24 11:52:48 2002 @@ -27,6 +27,8 @@ #include #include +#include + #include #include @@ -334,6 +336,8 @@ static inline int try_to_wake_up(struct unsigned long flags; int success = 0; + TRACE_PROCESS(TRACE_EV_PROCESS_WAKEUP, p->pid, p->state); + /* * We want the common case fall through straight, thus the goto. */ @@ -359,6 +363,7 @@ static void process_timeout(unsigned lon { struct task_struct * p = (struct task_struct *) __data; + TRACE_TIMER(TRACE_EV_TIMER_EXPIRED, 0, 0, 0); wake_up_process(p); } @@ -423,6 +428,8 @@ signed long schedule_timeout(signed long } } + TRACE_TIMER(TRACE_EV_TIMER_SETTIMEOUT, 0, timeout, 0); + expire = timeout + jiffies; init_timer(&timer); @@ -660,6 +667,8 @@ still_running_back: mmdrop(oldmm); } } + + TRACE_SCHEDCHANGE(prev, next); /* * This just switches the register state and the diff -urNp linux-2.4.7.SuSE-orig/kernel/signal.c linux-2.4.7.SuSE/kernel/signal.c --- linux-2.4.7.SuSE-orig/kernel/signal.c Wed Jan 3 23:45:26 2001 +++ linux-2.4.7.SuSE/kernel/signal.c Mon Jun 24 11:52:48 2002 @@ -14,6 +14,8 @@ #include #include +#include + #include /* @@ -540,6 +542,8 @@ printk("SIG queue (%s:%d): %d ", t->comm the signal. */ if (sig < SIGRTMIN && sigismember(&t->pending.signal, sig)) goto out; + + TRACE_PROCESS(TRACE_EV_PROCESS_SIGNAL, sig, t->pid); ret = deliver_signal(sig, info, t); out: diff -urNp linux-2.4.7.SuSE-orig/kernel/softirq.c linux-2.4.7.SuSE/kernel/softirq.c --- linux-2.4.7.SuSE-orig/kernel/softirq.c Tue Oct 30 16:43:30 2001 +++ linux-2.4.7.SuSE/kernel/softirq.c Mon Jun 24 11:52:48 2002 @@ -17,6 +17,8 @@ #include #include +#include + /* - No shared variables, all the data are CPU local. - If a softirq needs serialization, let it serialize itself @@ -86,8 +88,10 @@ restart: h = softirq_vec; do { - if (pending & 1) + if (pending & 1) { + TRACE_SOFT_IRQ(TRACE_EV_SOFT_IRQ_SOFT_IRQ, (h - softirq_vec)); h->action(h); + } h++; pending >>= 1; } while (pending); @@ -198,6 +202,7 @@ static void tasklet_action(struct softir if (!atomic_read(&t->count)) { if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) BUG(); + TRACE_SOFT_IRQ(TRACE_EV_SOFT_IRQ_TASKLET_ACTION, (unsigned long) (t->func)); t->func(t->data); tasklet_unlock(t); continue; @@ -236,6 +241,7 @@ static void tasklet_hi_action(struct sof if (!atomic_read(&t->count)) { if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) BUG(); + TRACE_SOFT_IRQ(TRACE_EV_SOFT_IRQ_TASKLET_HI_ACTION, (unsigned long) (t->func)); t->func(t->data); tasklet_unlock(t); continue; @@ -305,8 +311,10 @@ static void bh_action(unsigned long nr) if (!hardirq_trylock(cpu)) goto resched_unlock; - if (bh_base[nr]) + if (bh_base[nr]){ + TRACE_SOFT_IRQ(TRACE_EV_SOFT_IRQ_BOTTOM_HALF, (nr)); bh_base[nr](); + } hardirq_endlock(cpu); spin_unlock(&global_bh_lock); diff -urNp linux-2.4.7.SuSE-orig/kernel/timer.c linux-2.4.7.SuSE/kernel/timer.c --- linux-2.4.7.SuSE-orig/kernel/timer.c Tue Oct 30 16:43:36 2001 +++ linux-2.4.7.SuSE/kernel/timer.c Mon Jun 24 11:52:48 2002 @@ -23,6 +23,8 @@ #include #include +#include + #include /* @@ -667,6 +669,7 @@ static inline void update_times(void) void timer_bh(void) { + TRACE_EVENT(TRACE_EV_KERNEL_TIMER, NULL); update_times(); run_timer_list(); } diff -urNp linux-2.4.7.SuSE-orig/kernel/trace.c linux-2.4.7.SuSE/kernel/trace.c --- linux-2.4.7.SuSE-orig/kernel/trace.c Wed Dec 31 19:00:00 1969 +++ linux-2.4.7.SuSE/kernel/trace.c Mon Jun 24 11:52:48 2002 @@ -0,0 +1,692 @@ +/* + * linux/kernel/trace.c + * + * (C) Copyright 1999, 2000, 2001, 2002 - Karim Yaghmour (karym@opersys.com) + * + * This code is distributed under the GPL license + * + * Tracing management + * + */ + +#include /* For __init */ +#include /* Tracing definitions */ +#include /* Miscellaneous error codes */ +#include /* NULL */ +#include /* kmalloc() */ +#include /* EXPORT_SYMBOL */ +#include /* pid_t */ + +/* Local variables */ +static int tracer_registered = 0; /* Is there a tracer registered */ +struct tracer * tracer = NULL; /* The registered tracer */ + +/* Registration lock */ +rwlock_t tracer_register_lock = RW_LOCK_UNLOCKED; + +/* Trace callback table entry */ +struct trace_callback_table_entry +{ + tracer_call callback; /* The callback function */ + + struct trace_callback_table_entry* next; /* Next entry */ +}; + +/* Trace callback table */ +struct trace_callback_table_entry trace_callback_table[TRACE_EV_MAX]; + +/* Custom event description */ +struct custom_event_desc +{ + /* The event itself */ + trace_new_event event; + + /* PID of event owner, if any */ + pid_t owner_pid; + + /* List links */ + struct custom_event_desc* next; + struct custom_event_desc* prev; +}; + +/* Next event ID to be used */ +int next_event_id; + +/* Circular list of custom events */ +struct custom_event_desc custom_events_head; +struct custom_event_desc* custom_events; + +/* Circular list lock */ +rwlock_t custom_list_lock = RW_LOCK_UNLOCKED; + +/**************************************************** + * Register the tracer to the kernel + * Return values : + * 0, all is OK + * -EBUSY, there already is a registered tracer + * -ENOMEM, couldn't allocate memory + ****************************************************/ +int register_tracer(tracer_call pm_trace_function) +{ + unsigned long l_flags; /* Flags for irqsave */ + + /* Is there a tracer already registered */ + if(tracer_registered == 1) + return -EBUSY; + + /* Allocate memory for the tracer */ + if((tracer = (struct tracer *) kmalloc(sizeof(struct tracer), GFP_ATOMIC)) == NULL) + /* We couldn't allocate any memory */ + return -ENOMEM; + + /* Lock registration variables */ + write_lock_irqsave(&tracer_register_lock, l_flags); + + /* There is a tracer registered */ + tracer_registered = 1; + + /* Set the tracer to the one being passed by the caller */ + tracer->trace = pm_trace_function; + + /* Unlock registration variables */ + write_unlock_irqrestore(&tracer_register_lock, l_flags); + + /* Initialize the tracer settings */ + tracer->fetch_syscall_eip_use_bounds = 0; + tracer->fetch_syscall_eip_use_depth = 0; + + /* Tell the caller that everything went fine */ + return 0; +} + +/*************************************************** + * Unregister the currently registered tracer + * Return values : + * 0, all is OK + * -ENOMEDIUM, there isn't a registered tracer + * -ENXIO, unregestering wrong tracer + ***************************************************/ +int unregister_tracer(tracer_call pm_trace_function) +{ + unsigned long l_flags; /* Flags for irqsave */ + + /* Is there a tracer already registered */ + if(tracer_registered == 0) + /* Nothing to unregister */ + return -ENOMEDIUM; + + /* Lock registration variables */ + write_lock_irqsave(&tracer_register_lock, l_flags); + + /* Is it the tracer that was registered */ + if(tracer->trace == pm_trace_function) + /* There isn't any tracer in here */ + tracer_registered = 0; + else + { + /* Unlock registration variables */ + write_unlock_irqrestore(&tracer_register_lock, l_flags); + + /* We're done here */ + return -ENXIO; + } + + /* Free the memory used by the tracing structure */ + kfree(tracer); + tracer = NULL; + + /* Unlock registration variables */ + write_unlock_irqrestore(&tracer_register_lock, l_flags); + + /* Tell the caller that everything went OK */ + return 0; +} + +/******************************************************* + * Set the tracing configuration + * Parameters : + * pm_trace_function, the trace function. + * pm_fetch_syscall_use_depth, Use depth to fetch eip + * pm_fetch_syscall_use_bounds, Use bounds to fetch eip + * pm_syscall_eip_depth, Detph to fetch eip + * pm_syscall_lower_bound, Lower bound eip address + * pm_syscall_upper_bound, Upper bound eip address + * Return values : + * 0, all is OK + * -ENOMEDIUM, there isn't a registered tracer + * -ENXIO, wrong tracer + * -EINVAL, invalid configuration + *******************************************************/ +int trace_set_config(tracer_call pm_trace_function, + int pm_fetch_syscall_use_depth, + int pm_fetch_syscall_use_bounds, + int pm_syscall_eip_depth, + void* pm_syscall_lower_bound, + void* pm_syscall_upper_bound) +{ + /* Is there a tracer already registered */ + if(tracer_registered == 0) + return -ENOMEDIUM; + + /* Is it the tracer that was registered */ + if(tracer->trace != pm_trace_function) + return -ENXIO; + + /* Is this a valid configuration */ + if((pm_fetch_syscall_use_depth && pm_fetch_syscall_use_bounds) + ||(pm_syscall_lower_bound > pm_syscall_upper_bound) + ||(pm_syscall_eip_depth < 0)) + return -EINVAL; + + /* Set the configuration */ + tracer->fetch_syscall_eip_use_depth = pm_fetch_syscall_use_depth; + tracer->fetch_syscall_eip_use_bounds = pm_fetch_syscall_use_bounds; + tracer->syscall_eip_depth = pm_syscall_eip_depth; + tracer->syscall_lower_eip_bound = pm_syscall_lower_bound; + tracer->syscall_upper_eip_bound = pm_syscall_upper_bound; + + /* Tell the caller that everything was OK */ + return 0; +} + +/******************************************************* + * Get the tracing configuration + * Parameters : + * pm_fetch_syscall_use_depth, Use depth to fetch eip + * pm_fetch_syscall_use_bounds, Use bounds to fetch eip + * pm_syscall_eip_depth, Detph to fetch eip + * pm_syscall_lower_bound, Lower bound eip address + * pm_syscall_upper_bound, Upper bound eip address + * Return values : + * 0, all is OK + * -ENOMEDIUM, there isn't a registered tracer + *******************************************************/ +int trace_get_config(int* pm_fetch_syscall_use_depth, + int* pm_fetch_syscall_use_bounds, + int* pm_syscall_eip_depth, + void** pm_syscall_lower_bound, + void** pm_syscall_upper_bound) +{ + /* Is there a tracer already registered */ + if(tracer_registered == 0) + return -ENOMEDIUM; + + /* Get the configuration */ + *pm_fetch_syscall_use_depth = tracer->fetch_syscall_eip_use_depth; + *pm_fetch_syscall_use_bounds = tracer->fetch_syscall_eip_use_bounds; + *pm_syscall_eip_depth = tracer->syscall_eip_depth; + *pm_syscall_lower_bound = tracer->syscall_lower_eip_bound; + *pm_syscall_upper_bound = tracer->syscall_upper_eip_bound; + + /* Tell the caller that everything was OK */ + return 0; +} + +/******************************************************* + * Register a callback function to be called on occurence + * of given event + * Parameters : + * pm_trace_function, the callback function. + * pm_event_id, the event ID to be monitored. + * Return values : + * 0, all is OK + * -ENOMEM, unable to allocate memory for callback + *******************************************************/ +int trace_register_callback(tracer_call pm_trace_function, + uint8_t pm_event_id) +{ + struct trace_callback_table_entry* p_tct_entry; + + /* Search for an empty entry in the callback table */ + for(p_tct_entry = &(trace_callback_table[pm_event_id - 1]); + p_tct_entry->next != NULL; + p_tct_entry = p_tct_entry->next); + + /* Allocate a new callback */ + if((p_tct_entry->next = kmalloc(sizeof(struct trace_callback_table_entry), GFP_ATOMIC)) == NULL) + return -ENOMEM; + + /* Setup the new callback */ + p_tct_entry->next->callback = pm_trace_function; + p_tct_entry->next->next = NULL; + + /* Tell the caller everything is ok */ + return 0; +} + +/******************************************************* + * UnRegister a callback function. + * Parameters : + * pm_trace_function, the callback function. + * pm_event_id, the event ID that had to be monitored. + * Return values : + * 0, all is OK + * -ENOMEDIUM, no such callback resigtered + *******************************************************/ +int trace_unregister_callback(tracer_call pm_trace_function, + uint8_t pm_event_id) +{ + struct trace_callback_table_entry* p_tct_entry; /* Pointer to trace callback table entry */ + struct trace_callback_table_entry* p_temp_entry; /* Pointer to trace callback table entry */ + + /* Search for the callback in the callback table */ + for(p_tct_entry = &(trace_callback_table[pm_event_id - 1]); + ((p_tct_entry->next != NULL) && (p_tct_entry->next->callback != pm_trace_function)); + p_tct_entry = p_tct_entry->next); + + /* Did we find anything */ + if(p_tct_entry == NULL) + return -ENOMEDIUM; + + /* Free the callback entry */ + p_temp_entry = p_tct_entry->next->next; + kfree(p_tct_entry->next); + p_tct_entry->next = p_temp_entry; + + /* Tell the caller everything is ok */ + return 0; +} + +/******************************************************* + * Create a new traceable event type + * Parameters : + * pm_event_type, string describing event type + * pm_event_desc, string used for standard formatting + * pm_format_type, type of formatting used to log event + * data + * pm_format_data, data specific to format + * pm_owner_pid, PID of event's owner (0 if none) + * Return values : + * New Event ID if all is OK + * -ENOMEM, Unable to allocate new event + *******************************************************/ +int _trace_create_event(char* pm_event_type, + char* pm_event_desc, + int pm_format_type, + char* pm_format_data, + pid_t pm_owner_pid) +{ + struct custom_event_desc* p_new_event; /* Newly created event */ + + /* Create event */ + if((p_new_event = (struct custom_event_desc*) kmalloc(sizeof(struct custom_event_desc), GFP_ATOMIC)) == NULL) + return -ENOMEM; + + /* Initialize event properties */ + p_new_event->event.type[0] = '\0'; + p_new_event->event.desc[0] = '\0'; + p_new_event->event.form[0] = '\0'; + + /* Set basic event properties */ + if(pm_event_type != NULL) + strncpy(p_new_event->event.type, pm_event_type, CUSTOM_EVENT_TYPE_STR_LEN); + if(pm_event_desc != NULL) + strncpy(p_new_event->event.desc, pm_event_desc, CUSTOM_EVENT_DESC_STR_LEN); + if(pm_format_data != NULL) + strncpy(p_new_event->event.form, pm_format_data, CUSTOM_EVENT_FORM_STR_LEN); + + /* Ensure that strings are bound */ + p_new_event->event.type[CUSTOM_EVENT_TYPE_STR_LEN - 1] = '\0'; + p_new_event->event.desc[CUSTOM_EVENT_DESC_STR_LEN - 1] = '\0'; + p_new_event->event.form[CUSTOM_EVENT_FORM_STR_LEN - 1] = '\0'; + + /* Set format type */ + p_new_event->event.format_type = pm_format_type; + + /* Give the new event a unique event ID */ + p_new_event->event.id = next_event_id; + next_event_id++; + + /* Set event's owner */ + p_new_event->owner_pid = pm_owner_pid; + + /* Insert new event in event list */ + write_lock(&custom_list_lock); + p_new_event->next = custom_events; + p_new_event->prev = custom_events->prev; + custom_events->prev->next = p_new_event; + custom_events->prev = p_new_event; + write_unlock(&custom_list_lock); + + /* Log the event creation event */ + trace_event(TRACE_EV_NEW_EVENT, &(p_new_event->event)); + + /* Return new event ID */ + return p_new_event->event.id; +} +int trace_create_event(char* pm_event_type, + char* pm_event_desc, + int pm_format_type, + char* pm_format_data) +{ + return _trace_create_event(pm_event_type, pm_event_desc, pm_format_type, pm_format_data, 0); +} +int trace_create_owned_event(char* pm_event_type, + char* pm_event_desc, + int pm_format_type, + char* pm_format_data, + pid_t pm_owner_pid) +{ + return _trace_create_event(pm_event_type, pm_event_desc, pm_format_type, pm_format_data, pm_owner_pid); +} + +/******************************************************* + * Destroy a created event type + * Parameters : + * pm_event_id, the Id returned by trace_create_event() + * Return values : + * NONE + *******************************************************/ +void trace_destroy_event(int pm_event_id) +{ + struct custom_event_desc* p_event_desc; /* Generic event description pointer */ + + /* Lock the table for writting */ + write_lock(&custom_list_lock); + + /* Go through the event description list */ + for(p_event_desc = custom_events->next; + p_event_desc != custom_events; + p_event_desc = p_event_desc->next) + if(p_event_desc->event.id == pm_event_id) + break; + + /* If we found something */ + if(p_event_desc != custom_events) + { + /* Remove the event fromt the list */ + p_event_desc->next->prev = p_event_desc->prev; + p_event_desc->prev->next = p_event_desc->next; + + /* Free the memory used by this event */ + kfree(p_event_desc); + } + + /* Unlock the table for writting */ + write_unlock(&custom_list_lock); +} + +/******************************************************* + * Destroy an owner's events + * Parameters : + * pm_owner_pid, the PID of the owner who's events are to + * be deleted. + * Return values : + * NONE + *******************************************************/ +void trace_destroy_owners_events(pid_t pm_owner_pid) +{ + struct custom_event_desc* p_temp_event; /* Temporary event */ + struct custom_event_desc* p_event_desc; /* Generic event description pointer */ + + /* Lock the table for writting */ + write_lock(&custom_list_lock); + + /* Start at the first event in the list */ + p_event_desc = custom_events->next; + + /* Go through the event description list */ + while(p_event_desc != custom_events) + { + /* Keep pointer to next event */ + p_temp_event = p_event_desc->next; + + /* Does this event belong to the same owner */ + if(p_event_desc->owner_pid == pm_owner_pid) + { + /* Remove the event fromt the list */ + p_event_desc->next->prev = p_event_desc->prev; + p_event_desc->prev->next = p_event_desc->next; + + /* Free the memory used by this event */ + kfree(p_event_desc); + } + + /* Go to next event */ + p_event_desc = p_temp_event; + } + + /* Unlock the table for writting */ + write_unlock(&custom_list_lock); +} + +/******************************************************* + * Relog the declarations of custom events. This is + * necessary to make sure that even though the event + * creation might not have taken place during a trace, + * that all custom events be part of all traces. Hence, + * if a custom event occurs during a trace, we can be + * sure that it's definition is part of the trace. + * Parameters : + * NONE + * Return values : + * NONE + *******************************************************/ +void trace_reregister_custom_events(void) +{ + struct custom_event_desc* p_event_desc; /* Generic event description pointer */ + + /* Lock the table for reading */ + read_lock(&custom_list_lock); + + /* Go through the event description list */ + for(p_event_desc = custom_events->next; + p_event_desc != custom_events; + p_event_desc = p_event_desc->next) + /* Log the event creation event */ + trace_event(TRACE_EV_NEW_EVENT, &(p_event_desc->event)); + + /* Unlock the table for reading */ + read_unlock(&custom_list_lock); +} + +/******************************************************* + * Trace a formatted event + * Parameters : + * pm_event_id, the event Id provided upon creation + * ..., printf-like data that will be used to fill the + * event string. + * Return values : + * 0, all is OK + * -ENOMEDIUM, there isn't a registered tracer or this + * event doesn't exist. + * -EBUSY, tracing hasn't started yet + *******************************************************/ +int trace_std_formatted_event(int pm_event_id, ...) +{ + int l_string_size; /* Size of the string outputed by vsprintf() */ + char l_string[CUSTOM_EVENT_FINAL_STR_LEN]; /* Final formatted string */ + va_list l_var_arg_list; /* Variable argument list */ + trace_custom l_custom; /* Custom event */ + struct custom_event_desc* p_event_desc; /* Generic event description pointer */ + + /* Lock the table for reading */ + read_lock(&custom_list_lock); + + /* Go through the event description list */ + for(p_event_desc = custom_events->next; + p_event_desc != custom_events; + p_event_desc = p_event_desc->next) + if(p_event_desc->event.id == pm_event_id) + break; + + /* If we haven't found anything */ + if(p_event_desc == custom_events) + { + /* Unlock the table for reading */ + read_unlock(&custom_list_lock); + + /* No such thing */ + return -ENOMEDIUM; + } + + /* Set custom event Id */ + l_custom.id = pm_event_id; + + /* Initialize variable argument list access */ + va_start(l_var_arg_list, pm_event_id); + + /* Print the description out to the temporary buffer */ + l_string_size = vsprintf(l_string, p_event_desc->event.desc, l_var_arg_list); + + /* Unlock the table for reading */ + read_unlock(&custom_list_lock); + + /* Facilitate return to caller */ + va_end(l_var_arg_list); + + /* Set the size of the event */ + l_custom.data_size = (uint32_t) (l_string_size + 1); + + /* Set the pointer to the event data */ + l_custom.data = l_string; + + /* Log the custom event */ + return trace_event(TRACE_EV_CUSTOM, &l_custom); +} + +/******************************************************* + * Trace a raw event + * Parameters : + * pm_event_id, the event Id provided upon creation + * pm_event_size, the size of the data provided + * pm_event_data, data buffer describing event + * Return values : + * 0, all is OK + * -ENOMEDIUM, there isn't a registered tracer or this + * event doesn't exist. + * -EBUSY, tracing hasn't started yet + *******************************************************/ +int trace_raw_event(int pm_event_id, int pm_event_size, void* pm_event_data) +{ + trace_custom l_custom; /* Custom event */ + struct custom_event_desc* p_event_desc; /* Generic event description pointer */ + + /* Lock the table for reading */ + read_lock(&custom_list_lock); + + /* Go through the event description list */ + for(p_event_desc = custom_events->next; + p_event_desc != custom_events; + p_event_desc = p_event_desc->next) + if(p_event_desc->event.id == pm_event_id) + break; + + /* Unlock the table for reading */ + read_unlock(&custom_list_lock); + + /* If we haven't found anything */ + if(p_event_desc == custom_events) + /* No such thing */ + return -ENOMEDIUM; + + /* Set custom event Id */ + l_custom.id = pm_event_id; + + /* Set the data size */ + if(pm_event_size <= CUSTOM_EVENT_MAX_SIZE) + l_custom.data_size = (uint32_t) pm_event_size; + else + l_custom.data_size = (uint32_t) CUSTOM_EVENT_MAX_SIZE; + + /* Set the pointer to the event data */ + l_custom.data = pm_event_data; + + /* Log the custom event */ + return trace_event(TRACE_EV_CUSTOM, &l_custom); +} + +/******************************************************* + * Trace an event + * Parameters : + * pm_event_id, the event's ID (check out trace.h) + * pm_event_struct, the structure describing the event + * Return values : + * 0, all is OK + * -ENOMEDIUM, there isn't a registered tracer + * -EBUSY, tracing hasn't started yet + *******************************************************/ +int trace_event(uint8_t pm_event_id, + void* pm_event_struct) +{ + int l_ret_value; /* The return value */ + struct trace_callback_table_entry* p_tct_entry; /* Pointer to trace callback table entry */ + + /* Lock registration variables */ + read_lock(&tracer_register_lock); + + /* Is there a tracer registered */ + if(tracer_registered != 1) + l_ret_value = -ENOMEDIUM; + else + /* Call the tracer */ + l_ret_value = tracer->trace(pm_event_id, pm_event_struct); + + /* Unlock registration variables */ + read_unlock(&tracer_register_lock); + + /* Is this a native event */ + if(pm_event_id <= TRACE_EV_MAX) + { + /* Are there any callbacks to call */ + if(trace_callback_table[pm_event_id - 1].next != NULL) + { + /* Call all the callbacks linked to this event */ + for(p_tct_entry = trace_callback_table[pm_event_id - 1].next; + p_tct_entry != NULL; + p_tct_entry = p_tct_entry->next) + p_tct_entry->callback(pm_event_id, pm_event_struct); + } + } + + /* Give the return value */ + return l_ret_value; +} + +/******************************************************* + * Initialize trace facility + * Parameters : + * NONE + * Return values : + * NONE + *******************************************************/ +static int __init trace_init(void) +{ + int i; /* Generic index */ + + /* Initialize callback table */ + for(i = 0; i < TRACE_EV_MAX; i++) + { + trace_callback_table[i].callback = NULL; + trace_callback_table[i].next = NULL; + } + + /* Next event ID to be used */ + next_event_id = TRACE_EV_MAX + 1; + + /* Initialize custom events list */ + custom_events = &custom_events_head; + custom_events->next = custom_events; + custom_events->prev = custom_events; + + /* Everything is OK */ + return 0; +} + +module_init(trace_init); + +/* Export symbols so that can be visible from outside this file */ +EXPORT_SYMBOL(register_tracer); +EXPORT_SYMBOL(unregister_tracer); +EXPORT_SYMBOL(trace_set_config); +EXPORT_SYMBOL(trace_get_config); +EXPORT_SYMBOL(trace_register_callback); +EXPORT_SYMBOL(trace_unregister_callback); +EXPORT_SYMBOL(trace_create_event); +EXPORT_SYMBOL(trace_create_owned_event); +EXPORT_SYMBOL(trace_destroy_event); +EXPORT_SYMBOL(trace_destroy_owners_events); +EXPORT_SYMBOL(trace_reregister_custom_events); +EXPORT_SYMBOL(trace_std_formatted_event); +EXPORT_SYMBOL(trace_raw_event); +EXPORT_SYMBOL(trace_event); diff -urNp linux-2.4.7.SuSE-orig/mm/filemap.c linux-2.4.7.SuSE/mm/filemap.c --- linux-2.4.7.SuSE-orig/mm/filemap.c Tue Oct 30 16:43:41 2001 +++ linux-2.4.7.SuSE/mm/filemap.c Mon Jun 24 11:52:48 2002 @@ -24,6 +24,8 @@ #include #include +#include + #include #include #include @@ -758,10 +760,12 @@ void ___wait_on_page(struct page *page) set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!PageLocked(page)) break; + TRACE_MEMORY(TRACE_EV_MEMORY_PAGE_WAIT_START, 0); sync_page(page); schedule(); } while (PageLocked(page)); tsk->state = TASK_RUNNING; + TRACE_MEMORY(TRACE_EV_MEMORY_PAGE_WAIT_END, 0); remove_wait_queue(&page->wait, &wait); } diff -urNp linux-2.4.7.SuSE-orig/mm/memory.c linux-2.4.7.SuSE/mm/memory.c --- linux-2.4.7.SuSE-orig/mm/memory.c Tue Oct 30 16:43:37 2001 +++ linux-2.4.7.SuSE/mm/memory.c Mon Jun 24 11:52:48 2002 @@ -45,6 +45,8 @@ #include #include +#include + #include #include @@ -1087,6 +1089,7 @@ static int do_swap_page(struct mm_struct spin_unlock(&mm->page_table_lock); page = lookup_swap_cache(entry); if (!page) { + TRACE_MEMORY(TRACE_EV_MEMORY_SWAP_IN, address); lock_kernel(); swapin_readahead(entry); page = read_swap_cache_async(entry); diff -urNp linux-2.4.7.SuSE-orig/mm/page_alloc.c linux-2.4.7.SuSE/mm/page_alloc.c --- linux-2.4.7.SuSE-orig/mm/page_alloc.c Tue Oct 30 16:43:38 2001 +++ linux-2.4.7.SuSE/mm/page_alloc.c Mon Jun 24 11:52:48 2002 @@ -18,6 +18,8 @@ #include #include +#include + int nr_swap_pages; int nr_active_pages; int nr_inactive_dirty_pages; @@ -87,6 +89,7 @@ static void __free_pages_ok (struct page BUG(); if (PageInactiveClean(page)) BUG(); + TRACE_MEMORY(TRACE_EV_MEMORY_PAGE_FREE, order); page->flags &= ~((1<age = PAGE_AGE_START; @@ -516,6 +519,7 @@ unsigned long __get_free_pages(int gfp_m page = alloc_pages(gfp_mask, order); if (!page) return 0; + TRACE_MEMORY(TRACE_EV_MEMORY_PAGE_ALLOC, order); return (unsigned long) page_address(page); } diff -urNp linux-2.4.7.SuSE-orig/mm/swap_state.c linux-2.4.7.SuSE/mm/swap_state.c --- linux-2.4.7.SuSE-orig/mm/swap_state.c Wed Jul 18 18:18:15 2001 +++ linux-2.4.7.SuSE/mm/swap_state.c Mon Jun 24 11:52:48 2002 @@ -15,6 +15,8 @@ #include #include +#include + #include /* @@ -34,6 +36,7 @@ static int swap_writepage(struct page *p return 0; in_use: + TRACE_MEMORY(TRACE_EV_MEMORY_SWAP_OUT, (unsigned long) page); rw_swap_page(WRITE, page); return 0; } diff -urNp linux-2.4.7.SuSE-orig/net/core/dev.c linux-2.4.7.SuSE/net/core/dev.c --- linux-2.4.7.SuSE-orig/net/core/dev.c Tue Oct 30 16:43:46 2001 +++ linux-2.4.7.SuSE/net/core/dev.c Mon Jun 24 11:52:48 2002 @@ -103,6 +103,9 @@ #if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO) #include /* Note : will define WIRELESS_EXT */ #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */ + +#include + #ifdef CONFIG_PLIP extern int plip_init(void); #endif @@ -987,6 +990,8 @@ int dev_queue_xmit(struct sk_buff *skb) return -ENOMEM; } + TRACE_NETWORK(TRACE_EV_NETWORK_PACKET_OUT, skb->protocol); + /* Grab device queue */ spin_lock_bh(&dev->queue_lock); q = dev->qdisc; @@ -1421,6 +1426,8 @@ static void net_rx_action(struct softirq skb_bond(skb); rx_dev = skb->dev; + + TRACE_NETWORK(TRACE_EV_NETWORK_PACKET_IN, skb->protocol); #ifdef CONFIG_NET_FASTROUTE if (skb->pkt_type == PACKET_FASTROUTE) { diff -urNp linux-2.4.7.SuSE-orig/net/socket.c linux-2.4.7.SuSE/net/socket.c --- linux-2.4.7.SuSE-orig/net/socket.c Tue Oct 30 16:43:46 2001 +++ linux-2.4.7.SuSE/net/socket.c Mon Jun 24 11:52:48 2002 @@ -73,6 +73,8 @@ #include #include +#include + #if defined(CONFIG_KMOD) && defined(CONFIG_NET) #include #endif @@ -518,6 +520,8 @@ int sock_sendmsg(struct socket *sock, st int err; struct scm_cookie scm; + TRACE_SOCKET(TRACE_EV_SOCKET_SEND, sock->type, size); + err = scm_send(sock, msg, &scm); if (err >= 0) { err = sock->ops->sendmsg(sock, msg, size, &scm); @@ -532,6 +536,8 @@ int sock_recvmsg(struct socket *sock, st memset(&scm, 0, sizeof(scm)); + TRACE_SOCKET(TRACE_EV_SOCKET_RECEIVE, sock->type, size); + size = sock->ops->recvmsg(sock, msg, size, flags, &scm); if (size >= 0) scm_recv(sock, msg, &scm, flags); @@ -936,6 +942,8 @@ asmlinkage long sys_socket(int family, i if (retval < 0) goto out_release; + TRACE_SOCKET(TRACE_EV_SOCKET_CREATE, retval, type); + out: /* It may be already another descriptor 8) Not kernel problem. */ return retval; @@ -1572,6 +1580,8 @@ asmlinkage long sys_socketcall(int call, a0=a[0]; a1=a[1]; + + TRACE_SOCKET(TRACE_EV_SOCKET_CALL, call, a0); switch(call) {