When Daemons Attack: Debugging Linux Applications
Notes from a talk I gave to the Brandeis University Computer Operators Group.
- Call tracing
- System call tracing — strace (Linux), truss (BSD), strace for NT
- Library call tracing — ltrace (Linux/BSD)
- Trace example (
ltrace -S
)- First, the program is linked and loaded…
SYS_uname(0xbffff600) = 0 SYS_brk(NULL) = 0x0804c000 SYS_open("/etc/ld.so.preload", 0, 010000210574) = -2 SYS_open("/etc/ld.so.cache", 0, 00) = 3 SYS_fstat64(3, 0xbfffeda0, 0x400114ac, 0, 0x400115e4) = 0 SYS_mmap(0xbfffed70, 0, 0x400114ac, 3, 0x40011594) = 0x40012000 SYS_close(3) = 0 SYS_open("/lib/libc.so.6", 0, 027777767210) = 3 SYS_read(3, "\177ELF\001\001\001", 1024) = 1024 SYS_fstat64(3, 0xbfffedf0, 0x400114ac, 0, 0x400115e4) = 0 SYS_mmap(0xbfffecd0, 0x40011d30, 0x400114ac, 2, 0xbfffecf0) = 0x40029000 SYS_mprotect(0x40131000, 32292, 0, 2, 0xbfffecf0) = 0 SYS_mmap(0xbfffecd0, 0x40011d30, 0x400114ac, 0x40011d30, 0xbfffed08) = 0x40131000 SYS_mmap(0xbfffecd0, 0, 0x400114ac, 0x40137000, 0xbfffed08) = 0x40137000 SYS_close(3) = 0 SYS_mmap(0xbffff360, 8, 0x400114ac, 4096, 112) = 0x40139000 SYS_munmap(0x40012000, 92718) = 0
- We’ve finally reached the program’s main statement.
__libc_start_main(0x08048b28, 2, 0xbffffa94, 0x08048760, 0x08049b40setlocale(6, "") = "C" bindtextdomain("coreutils", "/usr/share/locale" SYS_brk(NULL) = 0x0804c000 SYS_brk(0x0804d000) = 0x0804d000 SYS_brk(NULL) = 0x0804d000 <... bindtextdomain resumed> ) = "/usr/share/locale" textdomain("coreutils") = "coreutils" __cxa_atexit(0x08048f34, 0, 0, 13, 0x08049edd) = 0 getenv("POSIXLY_CORRECT") = NULL getopt_long(2, 0xbffffa94, "+", 0x0804a080, NULL) = -1 fputs_unlocked(0xbffffbe2, 0x40132c40, 0x080489f8, 20304, 0xbffffa50
- strace shows this next line as
fstat64(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 4), ...}) = 0
— that’s why you should try strace before going to ltrace, it’s prettier.
SYS_fstat64(1, 0xbffff8d0, 0x40135f60, 0x40136560, 0) = 0
- This is libc’s fputs allocating a buffer.
SYS_mmap(0xbffff8b0, 0xbffff8d0, 0x40135f60, 0x40132c40, 4096) = 0x40012000 <... fputs_unlocked resumed> ) = 1 exit(0__fpending(0x40132c40, 0x400114ac, 0x400116d8, 0x08048728, 0x40135f60) = 13 fclose(0x40132c40 SYS_write(1, "Hello, world\n", 13Hello, world ) = 13
- echo explicitly closes stdout for some reason.
SYS_close(1) = 0 SYS_munmap(0x40012000, 4096) = 0 <... fclose resumed> ) = 0 SYS_exit_group(0+++ exited (status 0) +++
- Example: Finding errors with
strace
foo: No such file or directory
- Negative return values usually indicate errors, so try grepping for them
- If the program is printing an error message, start at where that error message is printed and work backwards
open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3 fstat64(3, {st_mode=S_IFREG|0644, st_size=1384816, ...}) = 0 mmap2(NULL, 1384816, PROT_READ, MAP_PRIVATE, 3, 0) = 0x4019a000 close(3) = 0 ioctl(1, SNDCTL_TMR_TIMEBASE, {B38400 opost isig icanon echo ...}) = 0 ioctl(1, TIOCGWINSZ, {ws_row=57, ws_col=158, ws_xpixel=1584, ws_ypixel=1144}) = 0 brk(0) = 0x805a000 brk(0x805d000) = 0x805d000 stat64("foo", 0x8059a5c) = -1 ENOENT (No such file or directory) lstat64("foo", 0x8059a5c) = -1 ENOENT (No such file or directory) write(2, "ls: ", 4ls: ) = 4 write(2, "foo", 3foo) = 3 open("/usr/share/locale/locale.alias", O_RDONLY) = 3 fstat64(3, {st_mode=S_IFREG|0644, st_size=2627, ...}) = 0 old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x40012000 read(3, "# Locale name alias data base.\n#"..., 4096) = 2627 brk(0) = 0x805d000 brk(0x805e000) = 0x805e000 read(3, "", 4096) = 0 close(3) = 0 munmap(0x40012000, 4096) = 0 open("/usr/share/locale/en_US.ISO-8859-1/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en_US.iso88591/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en_US/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en.ISO-8859-1/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en.iso88591/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory) write(2, ": No such file or directory", 27: No such file or directory) = 27 write(2, "\n", 1 ) = 1 exit_group(1)
- Source-level Debugging
(This is why having source code is good…) Useful gdb commands:
bt
(backtrace) — Shows the stack of function callsb
(breakpoint) — Sets a breakpointprint expr
- Example: Using
gdb
/* Silly C program to print numbers */ #include <stdio.h> #include <stdlib.h> void fillarray(int *array, int size) { int i; for(i = 0; i < size; i++) array[i] = i; } int main(int argc, char *argv[]) { int *numarray, i; numarray = malloc(atoi(argv[1])); fillarray(numarray, sizeof(numarray)); for(i = 0; i < sizeof(numarray); i++) printf("%d\n", numarray[i]); return 0; }
bash$ printnums Segmentation fault bash$ gcc -ggdb -o printnums printnums.c bash$ gdb printnums (gdb) r Starting program: /home/matthewg/printnums Program received signal SIGSEGV, Segmentation fault. 0x40052a6e in __strtol_internal () from /lib/libc.so.6 (gdb) bt #0 0x40052a6e in __strtol_internal () from /lib/libc.so.6 #1 0x40050849 in atoi () from /lib/libc.so.6 #2 0x080483e3 in main (argc=1, argv=0xbffffa04) at printnums.c:15 (gdb) up #1 0x40050849 in atoi () from /lib/libc.so.6 (gdb) up #2 0x080483e3 in main (argc=1, argv=0xbffffa04) at printnums.c:15 15 numarray = malloc(atoi(argv[1])); (gdb) print argv[1] (gdb) print argc $4 = 1 (gdb) quit A debugging session is active. Do you still want to close the debugger?(y or n) y
- The program’s expecting a command-line argument, we forgot to give it one or check for that condition.
Continuing. 0 1 2 3 Program exited normally. (gdb) b 20 Breakpoint 2 at 0x8048451: file printnums.c, line 20. (gdb) r Starting program: /home/matthewg/printnums 1 Breakpoint 2, main (argc=2, argv=0xbffff9f4) at printnums.c:20 20 numarray = malloc(atoi(argv[1])); (gdb) s 21 fillarray(numarray, sizeof(numarray)); (gdb) print sizeof(numarray) $7 = 4 (gdb) print argv[1] $8 = 0xbffffb64 "1" (gdb) c Continuing. Breakpoint 1, fillarray (array=0x804a008, size=4) at printnums.c:9 9 for(i = 0; i < size; i++) array[i] = i; (gdb) clear Deleted breakpoint 1 (gdb) c Continuing. 0 1 2 3 Program exited normally. (gdb) quit
sizeof(foo)
doesn’t do what the programmer thought it did- Replacing Library Functions
fputs.c
source:
- Replacing Library Functions
#define _GNU_SOURCE #include <stdio.h> #include <dlfcn.h> int fputs_unlocked(const char *s, FILE *stream) { int (*orig_fputs)(const char *, FILE *); int retval; orig_fputs = dlsym(RTLD_NEXT, "fputs_unlocked"); printf("Doing fputs...\n"); retval = orig_fputs(s, stream); printf("fputs returning %d.\n", retval); return retval; }
bash$ gcc -shared -ldl -o fputs.so fputs.c bash$ LD_PRELOAD=./fputs.so /bin/echo "Hello, world" Doing fputs... Hello, worldfputs returning 1. bash$
- Why is there a line break after “fputs returning 1.” and none between that and “Hello, world” ?