Thursday, May 1, 2003

When Daemons Attack: Debugging Linux Applications

Notes from a talk I gave to the Brandeis University Computer Operators Group.

  • Call tracing
    • System call tracing — strace (Linux), truss (BSD), strace for NT
    • Library call tracing — ltrace (Linux/BSD)
  • Trace example (ltrace -S)
    • First, the program is linked and loaded…
 
 SYS_uname(0xbffff600)                            = 0
 SYS_brk(NULL)                                    = 0x0804c000
 SYS_open("/etc/ld.so.preload", 0, 010000210574)  = -2
 SYS_open("/etc/ld.so.cache", 0, 00)              = 3
 SYS_fstat64(3, 0xbfffeda0, 0x400114ac, 0, 0x400115e4) = 0
 SYS_mmap(0xbfffed70, 0, 0x400114ac, 3, 0x40011594) = 0x40012000
 SYS_close(3)                                     = 0
 SYS_open("/lib/libc.so.6", 0, 027777767210)      = 3
 SYS_read(3, "\177ELF\001\001\001", 1024)         = 1024
 SYS_fstat64(3, 0xbfffedf0, 0x400114ac, 0, 0x400115e4) = 0
 SYS_mmap(0xbfffecd0, 0x40011d30, 0x400114ac, 2, 0xbfffecf0) = 0x40029000
 SYS_mprotect(0x40131000, 32292, 0, 2, 0xbfffecf0) = 0
 SYS_mmap(0xbfffecd0, 0x40011d30, 0x400114ac, 0x40011d30, 0xbfffed08) = 0x40131000
 SYS_mmap(0xbfffecd0, 0, 0x400114ac, 0x40137000, 0xbfffed08) = 0x40137000
 SYS_close(3)                                     = 0
 SYS_mmap(0xbffff360, 8, 0x400114ac, 4096, 112)   = 0x40139000
 SYS_munmap(0x40012000, 92718)                    = 0
 
  • We’ve finally reached the program’s main statement.
 
 __libc_start_main(0x08048b28, 2, 0xbffffa94, 0x08048760, 0x08049b40  
 setlocale(6, "")                                 = "C"
 bindtextdomain("coreutils", "/usr/share/locale"  
 SYS_brk(NULL)                                    = 0x0804c000
 SYS_brk(0x0804d000)                              = 0x0804d000
 SYS_brk(NULL)                                    = 0x0804d000
 <... bindtextdomain resumed> )                   = "/usr/share/locale"
 textdomain("coreutils")                          = "coreutils"
 __cxa_atexit(0x08048f34, 0, 0, 13, 0x08049edd)   = 0
 getenv("POSIXLY_CORRECT")                        = NULL
 getopt_long(2, 0xbffffa94, "+", 0x0804a080, NULL) = -1
 fputs_unlocked(0xbffffbe2, 0x40132c40, 0x080489f8, 20304, 0xbffffa50  
 
  • strace shows this next line as fstat64(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 4), ...}) = 0 — that’s why you should try strace before going to ltrace, it’s prettier.
 
 SYS_fstat64(1, 0xbffff8d0, 0x40135f60, 0x40136560, 0) = 0
 
  • This is libc’s fputs allocating a buffer.
 
 SYS_mmap(0xbffff8b0, 0xbffff8d0, 0x40135f60, 0x40132c40, 4096) = 0x40012000
 <... fputs_unlocked resumed> )                   = 1
 exit(0  
 __fpending(0x40132c40, 0x400114ac, 0x400116d8, 0x08048728, 0x40135f60) = 13
 fclose(0x40132c40  
 SYS_write(1, "Hello, world\n", 13Hello, world
 )               = 13
 
  • echo explicitly closes stdout for some reason.
 
 SYS_close(1)                                     = 0
 SYS_munmap(0x40012000, 4096)                     = 0
 <... fclose resumed> )                           = 0
 SYS_exit_group(0  
 +++ exited (status 0) +++
 
  • Example: Finding errors with strace
    • foo: No such file or directory
    • Negative return values usually indicate errors, so try grepping for them
    • If the program is printing an error message, start at where that error message is printed and work backwards
 
 open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3
 fstat64(3, {st_mode=S_IFREG|0644, st_size=1384816, ...}) = 0
 mmap2(NULL, 1384816, PROT_READ, MAP_PRIVATE, 3, 0) = 0x4019a000
 close(3)                                = 0
 ioctl(1, SNDCTL_TMR_TIMEBASE, {B38400 opost isig icanon echo ...}) = 0
 ioctl(1, TIOCGWINSZ, {ws_row=57, ws_col=158, ws_xpixel=1584, ws_ypixel=1144}) = 0
 brk(0)                                  = 0x805a000
 brk(0x805d000)                          = 0x805d000
 stat64("foo", 0x8059a5c)                = -1 ENOENT (No such file or directory)
 lstat64("foo", 0x8059a5c)               = -1 ENOENT (No such file or directory) 
 write(2, "ls: ", 4ls: )                     = 4
 write(2, "foo", 3foo)                      = 3
 open("/usr/share/locale/locale.alias", O_RDONLY) = 3
 fstat64(3, {st_mode=S_IFREG|0644, st_size=2627, ...}) = 0
 old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x40012000
 read(3, "# Locale name alias data base.\n#"..., 4096) = 2627
 brk(0)                                  = 0x805d000
 brk(0x805e000)                          = 0x805e000
 read(3, "", 4096)                       = 0
 close(3)                                = 0
 munmap(0x40012000, 4096)                = 0
 open("/usr/share/locale/en_US.ISO-8859-1/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
 open("/usr/share/locale/en_US.iso88591/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
 open("/usr/share/locale/en_US/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
 open("/usr/share/locale/en.ISO-8859-1/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
 open("/usr/share/locale/en.iso88591/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
 open("/usr/share/locale/en/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
 write(2, ": No such file or directory", 27: No such file or directory) = 27 
 write(2, "\n", 1
 )                       = 1
 exit_group(1)                
 
  • Source-level Debugging (This is why having source code is good…) Useful gdb commands:
    • bt (backtrace) — Shows the stack of function calls
    • b (breakpoint) — Sets a breakpoint
    • print expr
  • Example: Using gdb
 
 /* Silly C program to print numbers */ 
 
 #include <stdio.h> 
 #include <stdlib.h> 
 
 void fillarray(int *array, int size) { 
     int i;
 
     for(i = 0; i &lt; size; i++) array[i] = i;
 } 
 
 int main(int argc, char *argv[]) { 
     int *numarray, i;
 
     numarray = malloc(atoi(argv[1]));
     fillarray(numarray, sizeof(numarray));
     for(i = 0; i &lt; sizeof(numarray); i++) printf("%d\n", numarray[i]);
 
     return 0;
 } 
 
 
 bash$ printnums
 Segmentation fault
 bash$ gcc -ggdb -o printnums printnums.c
 bash$ gdb printnums
 (gdb) r
 Starting program: /home/matthewg/printnums 
 
 Program received signal SIGSEGV, Segmentation fault.
 0x40052a6e in __strtol_internal () from /lib/libc.so.6
 (gdb) bt
 #0  0x40052a6e in __strtol_internal () from /lib/libc.so.6
 #1  0x40050849 in atoi () from /lib/libc.so.6
 #2  0x080483e3 in main (argc=1, argv=0xbffffa04) at printnums.c:15
 (gdb) up
 #1  0x40050849 in atoi () from /lib/libc.so.6
 (gdb) up
 #2  0x080483e3 in main (argc=1, argv=0xbffffa04) at printnums.c:15
 15        numarray = malloc(atoi(argv[1]));
 (gdb) print argv[1]
 (gdb) print argc
 $4 = 1
 (gdb) quit
 A debugging session is active.
 Do you still want to close the debugger?(y or n) y
 
  • The program’s expecting a command-line argument, we forgot to give it one or check for that condition.
 
 Continuing.
 0
 1
 2
 3
 
 Program exited normally.
 (gdb) b 20
 Breakpoint 2 at 0x8048451: file printnums.c, line 20.
 (gdb) r
 Starting program: /home/matthewg/printnums 1
 
 Breakpoint 2, main (argc=2, argv=0xbffff9f4) at printnums.c:20
 20        numarray = malloc(atoi(argv[1]));
 (gdb) s
 21        fillarray(numarray, sizeof(numarray));
 (gdb) print sizeof(numarray)
 $7 = 4
 (gdb) print argv[1]
 $8 = 0xbffffb64 "1"
 (gdb) c
 Continuing.
 
 Breakpoint 1, fillarray (array=0x804a008, size=4) at printnums.c:9
 9        for(i = 0; i < size; i++) array[i] = i;
 (gdb) clear
 Deleted breakpoint 1 
 (gdb) c
 Continuing.
 0
 1
 2
 3
 
 Program exited normally.
 (gdb) quit
 
  • sizeof(foo) doesn’t do what the programmer thought it did
    • Replacing Library Functions fputs.c source:
 
 #define _GNU_SOURCE 
 #include <stdio.h> 
 #include <dlfcn.h> 
 
 int fputs_unlocked(const char *s, FILE *stream) { 
         int (*orig_fputs)(const char *, FILE *);
         int retval;
 
         orig_fputs = dlsym(RTLD_NEXT, "fputs_unlocked");
 
         printf("Doing fputs...\n");
         retval = orig_fputs(s, stream);
         printf("fputs returning %d.\n", retval);
 
         return retval;
 } 
 
 
 bash$ gcc -shared -ldl -o fputs.so fputs.c
 bash$ LD_PRELOAD=./fputs.so /bin/echo "Hello, world"
 Doing fputs...
 Hello, worldfputs returning 1.
 
 bash$ 
 
  • Why is there a line break after “fputs returning 1.” and none between that and “Hello, world” ?