Sholck

不积跬步,无以至千里.不积小流,无以成江海

0%

gdb调试linux内核&驱动模块

gdb调试linux内核&驱动模块

在linux学习过程中,需要针对内核和动态模块进行调试,最常用的方法是gdb,但是linux gdb文档中描述的不够详细,在学习调试过程中碰到了不少坑并做如下记录。

调试案例:

源代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
///test-A.c
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/notifier.h>
#include <linux/sched.h>
//#include <linux/init_task.h>

static RAW_NOTIFIER_HEAD(notifier_test);
static struct task_struct *p;

int test1;
EXPORT_SYMBOL(test1);
int test2=2;
EXPORT_SYMBOL(test2);

static int call_test_notifier(unsigned long action, void *data) {
return raw_notifier_call_chain(&notifier_test, action, data);
}

EXPORT_SYMBOL(call_test_notifier);

static int register_test_notifier(struct notifier_block *nh) {
return raw_notifier_chain_register(&notifier_test, nh);
}

EXPORT_SYMBOL(register_test_notifier);

static int __init notifier_test_A_init(void) {
p = current;
printk("enter %s pid is %d\n", __func__, p->pid);
return 0;
}

static void __exit notifier_test_A_exit(void) {
printk("exit %s \n", __func__);
}

module_init(notifier_test_A_init);
module_exit(notifier_test_A_exit);

MODULE_LICENSE("GPL");
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
//test-B.c
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/notifier.h>
#include <linux/sched.h>
#include "test.h"

extern int register_test_notifier(struct notifier_block *nb);
static struct task_struct *p;
static int notifier_test_deal_0(struct notifier_block *nb, unsigned long action, void *val) {
printk("enter %s \n", __func__);
switch(action) {
case FIRST_EVENT:
printk("action is 0x%lu in func is %s\n", action, __func__);
break;
case SECOND_EVENT:
printk("aciotn is 0x%lu in func is %s\n", action, __func__);
break;
default:
printk("nothing to do\n");
break;
}
return 0;
}

static int notifier_test_deal_1(struct notifier_block *nb, unsigned long action, void *val) {
printk("enter %s \n", __func__);
switch(action) {
case FIRST_EVENT:
printk("action is 0x%lu in func is %s\n", action, __func__);
break;
case SECOND_EVENT:
printk("aciotn is 0x%lu in func is %s\n", action, __func__);
break;
default:
printk("nothing to do\n");
break;
}
return 0;
}

struct notifier_block test_nb_0 = {
.notifier_call = notifier_test_deal_0,
.next = NULL,
.priority = 0,
};

struct notifier_block test_nb_1 = {
.notifier_call = notifier_test_deal_1,
.next = NULL,
.priority = 1,
};

static int __init notifier_test_B_init(void) {
int ret;
p = current;
printk("enter %s pid is %d\n", __func__, p->pid);
ret = register_test_notifier(&test_nb_0);
if(ret < 0)
goto err;
ret = register_test_notifier(&test_nb_1);
err:
return ret;

}

static void __exit notifier_test_B_exit(void) {
printk("exit %s \n", __func__);
}

module_init(notifier_test_B_init);
module_exit(notifier_test_B_exit);

MODULE_LICENSE("GPL");
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
//test-C.c
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/notifier.h>
#include <linux/sched.h>

#include "test.h"
static struct task_struct *p;
extern int call_test_notifier(unsigned long action, void *data);

static int __init notifier_test_C_init(void) {
p = current;
printk("enter %s pid is %d\n", __func__, p->pid);
return call_test_notifier(FIRST_EVENT, "no use");
}

static void __exit notifier_test_C_exit(void) {
printk("exit %s \n", __func__);
}

module_init(notifier_test_C_init);
module_exit(notifier_test_C_exit);

MODULE_LICENSE("GPL");

//test.h
#ifndef _TEST_H_
#define _TEST_H_

#define EVENT_BASE (0x0)
#define FIRST_EVENT (EVENT_BASE + 1)
#define SECOND_EVENT (EVENT_BASE + 2)

#endif /* _TEST_H_ */
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
//Makefile
MODULE_SRCS += test-A.c test-B.c test-C.c

MODULE_OBJS += $(wildcard *.o) \
$(wildcard *.ko) \
$(wildcard *.mod) \
$(wildcard *.mod.c)

OUT_FILES += modules.order \
Module.symvers

#$(info "OBJS is $(MODULE_OBJS)")

obj-m += test-A.o test-B.o test-C.o
module-objs := test-A.o test-B.o test-C.o

#The path of kernel code
KDIR := /github/linux
PWD ?= $(shell pwd)

build: kernel_modules
kernel_modules:
make -C $(KDIR) M=$(PWD) modules

clean:
rm -rf $(MODULE_OBJS)
rm -rf $(OUT_FILES)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
//README
========================================== |
test-A.c |
================================ |
chain: notifier_test |
---------- |
| | |
| head -------------- |
| | | |
----------- | |
=============================== | |
test_nb_1 <--------| |
-------------- |
| | cb |
| notifier_all ----------------------------->test-B.c:notifier_test_deal_1
| | |
-------------- |
| | |
| next -------------- |
| | | |
------------- | |
| | | |
| priority | | <-------------test-B module insert node by call EXPROT_SYMBOL function
| | | |
------------- | |
================================= | |
test_nb_0 <-----------| |
-------------- |
| | cb |
| notifier_all ----------------------------->test-B.c:notifier_test_deal_0
| | |
-------------- |
| | |
| next=NULL | |
| | <-------------test-C modeule product entry
------------- |
| | |
| priority | |
| | |
------------- |
=================================
==========================================

内核调试

实践

为了防止在通过target remote:1234连接到虚拟机时断点已过,应该先gdb设置断点,再去attache(gdb能触发的断点最早是什么时候?)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
➜  x86-busybox-rootfs gdb vmlinux
(gdb) b start_kernel
Breakpoint 1 at 0xffffffff82d23c7c: file init/main.c, line 929.
(gdb) info b
Num Type Disp Enb Address What
1 breakpoint keep y 0xffffffff82d23c7c in start_kernel at init/main.c:929
(gdb) target remote:1234
Remote debugging using :1234
0x00000000034005a4 in ?? ()
(gdb) bt
#0 0x00000000034005a4 in ?? ()
#1 0x00000000033ffb02 in ?? ()
#2 0x00000000000001ff in ?? ()
#3 0x000000000000003f in ?? ()
#4 0x0000000001000000 in ?? ()
#5 0x0000000000000000 in ?? ()
(gdb) c
Continuing.

Thread 1 hit Breakpoint 1, start_kernel () at init/main.c:929
929 {
(gdb)

在attach过程中去启动虚拟机

1
➜  initramfs qemu-system-x86_64 -m 1024  -smp 2  -hda rootfs.img  -kernel /github/busybox/initramfs/x86-busybox-rootfs/vmlinuz-5.17.0+  -s -append "root=/dev/sda rdinit=init crashkernel=128M console=ttyS0 rw nokaslr" -nographic

注意

针对gdb调试,qemu命令需要额外增加如下两个选项:

  1. -s 监听gdb 1234端口,不加target remote:1234会连接超时
  2. nokaslr 防止内核地址随机化,另外一种方法是取消 “Randomize the kernel memory sections” inside “Processor type and features”(没有实践,有时间研究)

如果触发不了断点,除了去检查是否有配置nokaslr,还需要检查bzImage和vmlinux是否为同一版本,可以通过symbol去比较

1
2
3
4
5
//和info b显示的地址一致,/proc/kallsyms展示的内容是编译时确定的,还有模块插入时加入的
/ # cat /proc/kallsyms | grep -n "start_kernel"
114634:ffffffff9bd234c2 T x86_64_start_kernel
114660:ffffffff9bd23c7c T start_kernel
114797:ffffffff9bd2d423 T xen_start_kernel

模块调试

注意

在模块调试前,应注意一下几点:

  1. 保证make bzImage后执行make modules,保证编译动态模块时模块接口的symbol和内核的symbol保持一致,这些接口都由EXPORT_SYMBOL释放到内核
  2. 保证你需要设置的断点接口或者变量等通过EXPORT_SYMBOL导出,不然gdb找不到symbol无法调试。

导出的symbol可以通过objdump/nm等工具确定是否存在于对应section, 需要注意对应的section,之后如果调试需要我们要导入对应section的symbol

1
2
3
4
5
6
7
8
9
10
11
//eg file test-A.c
int test1;
EXPORT_SYMBOL(test1);
int test2=2;
EXPORT_SYMBOL(test2);

static int call_test_notifier(unsigned long action, void *data) {
return raw_notifier_call_chain(&notifier_test, action, data);
}

EXPORT_SYMBOL(call_test_notifier);

objdump打印所有symbol

1
2
3
4
5
6
7
1-notifier git:(master) ✗ objdump  -t test-A.ko
...
0000000000000000 g O .bss 0000000000000004 test1
0000000000000000 g O .data 0000000000000004 test2 >>因为test2已经初始化
0000000000000000 l F .text 0000000000000017 call_test_notifier
0000000000000020 l F .text 0000000000000014 register_test_notifier
...

nm打印

1
2
3
4
5
1-notifier git:(master) ✗ nm test-A.ko
0000000000000000 B test1
0000000000000000 D test2
0000000000000000 t call_test_notifier
0000000000000020 t register_test_notifier

通过objdump和nm打印,可以确认call_test_notifier相对.text的偏移为0000000000000000, 而register_test_notifier相对.text的偏移为0000000000000020

实践

先启动内核再插入模块

1
2
3
4
5
6
7
8
9
10
11
➜  initramfs qemu-system-x86_64 -m 1024  -smp 2  -hda rootfs.img  -kernel /github/busybox/initramfs/x86-busybox-rootfs/vmlinuz-5.17.0+  -s  -append "root=/dev/sda rdinit=init crashkernel=128M console=ttyS0 rw nokaslr" -nographic

/ # ./init >>自己的脚本,挂载/proc和/sys

/ # cd /lib/modules/5.17.0\+/
/lib/modules/5.17.0+ # cat /proc/kallsyms | grep -n "call_test_notifier"

/lib/modules/5.17.0+ # insmod test-A.ko
[ 72.955738] test_A: loading out-of-tree module taints kernel.
[ 72.956627] test_A: module verification failed: signature and/or required key missing - tainting kernel
[ 72.960299] enter notifier_test_A_init pid is 103 >>模块插入成功,打印插入动作的pid

检查模块插入后在内存中的映射symbol地址

1
2
3
4
5
6
7
8
9
10
11
/lib/modules/5.17.0+ # cat /proc/kallsyms | grep -n "call_test_notifier"   >>发现已经有test-A 的symbol
127163:ffffffffc005903c r __kstrtab_call_test_notifier [test_A]
127164:ffffffffc005904f r __kstrtabns_call_test_notifier [test_A]
127165:ffffffffc0059024 r __ksymtab_call_test_notifier [test_A]
127166:ffffffffc0058000 T call_test_notifier [test_A]

/lib/modules/5.17.0+ # cat /proc/kallsyms | grep -n "register_test_notifier"
127167:ffffffffc0059050 r __kstrtab_register_test_notifier [test_A]
127168:ffffffffc0059067 r __kstrtabns_register_test_notifier [test_A]
127169:ffffffffc0059030 r __ksymtab_register_test_notifier [test_A]
127170:ffffffffc0058020 T register_test_notifier [test_A]

模块插入内核后的symbol地址是根据加载到内存中的模块section地址+偏移来计算的

1
2
/lib/modules/5.17.0+ # lsmod
test_A 16384 0 - Live 0xffffffffc0058000 (OE) >> .text的基地址

查看全部该模块在内核中的section地址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/lib/modules/5.17.0+ # cd /sys/module/test_A/sections/
/sys/module/test_A/sections # ls -a
/sys/module/test_A/sections # ls -a >>重点关注.bss .data .text
. .orc_unwind_ip
.. .rodata
.bss .rodata.str1.1
.exit.text .strtab
.gnu.linkonce.this_module .symtab
.init.text .text
.note.Linux __ksymtab
.note.gnu.build-id __ksymtab_strings
.orc_unwind __mcount_loc

/sys/module/test_A/sections # cat .text
0xffffffffc0058000
/sys/module/test_A/sections # cat .bss
0xffffffffc005a380

获取到模块地址后启动gdb并导入模块symbol

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
➜  x86-busybox-rootfs gdb vmlinux
(gdb) target remote:1234
Remote debugging using :1234
0xffffffff8fb1127b in ?? ()
(gdb) add-symbol-file /github/busybox/initramfs/x86-busybox-rootfs/test-A.ko -s .text 0xffffffffc0058000
add symbol table from file "/github/busybox/initramfs/x86-busybox-rootfs/test-A.ko" at
.text_addr = 0xffffffffc0058000
(y or n) y
Reading symbols from /github/busybox/initramfs/x86-busybox-rootfs/test-A.ko...done.
(gdb) b call_test_notifier
Breakpoint 1 at 0xffffffffc0058000: file /github/linux-driver/1-notifier/test-A.c, line 11. >>symbol地址 = .text + 偏移,即/proc/kallsyms打印
(gdb) b register_test_notifier
Breakpoint 2 at 0xffffffffc0058020: file /github/linux-driver/1-notifier/test-A.c, line 17.
(gdb) c
Continuing.

此时再插入test-B和test-C即可触发模块

1
2
3
4
5
6
7
8
/lib/modules/5.17.0+ # insmod test-B.ko 
[ 2003.258864] enter notifier_test_B_init pid is 122 >>断点2 hit 1
/lib/modules/5.17.0+ # insmod test-C.ko
[ 2010.537016] enter notifier_test_C_init pid is 123 >>断点2 hit 2
[ 2010.582501] enter notifier_test_deal_1 >>断点1 hit 1
[ 2010.586064] action is 0x1 in func is notifier_test_deal_1
[ 2010.586365] enter notifier_test_deal_0
[ 2010.586521] action is 0x1 in func is notifier_test_deal_0

gdb内运行情况

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

Thread 1 hit Breakpoint 2, register_test_notifier (nh=0xffffffffc005f020)
at /github/linux-driver/1-notifier/test-A.c:17
warning: Source file is more recent than executable.
17 return raw_notifier_call_chain(&notifier_test, action, data); >>断点2 hit 1
(gdb) c
Continuing.

Thread 1 hit Breakpoint 2, register_test_notifier (nh=0xffffffffc005f000)
at /github/linux-driver/1-notifier/test-A.c:17
17 return raw_notifier_call_chain(&notifier_test, action, data); >>断点2 hit 2
(gdb) c
Continuing.

Thread 1 hit Breakpoint 1, call_test_notifier (action=1, data=0xffffffffc0063038) >>断点1 hit 1
at /github/linux-driver/1-notifier/test-A.c:11
11 int test1;
(gdb) c
Continuing.
^C
Thread 1 received signal SIGINT, Interrupt.
0xffffffff8fb1127b in ?? ()
(gdb) info b
Num Type Disp Enb Address What
1 breakpoint keep y 0xffffffffc0058000 in call_test_notifier
at /github/linux-driver/1-notifier/test-A.c:11
breakpoint already hit 1 time
2 breakpoint keep y 0xffffffffc0058020 in register_test_notifier
at /github/linux-driver/1-notifier/test-A.c:17
breakpoint already hit 2 times

linux-gdb-helper

安装

内核为了方便gdb调试,增加了gdb自定义接口提高调试效率.优点如下:

  1. 可以减少 add-symbol-file等单个模块symbol接口导入的繁琐指令,
    通过lx-symbols在当前目录索引内核已经插入的动态模块并自动加载对应的symbols
  2. 减少gdb和内核的持续切换调试,避免c指令的重复使用

linux辅助gdb插件位置:scripts/gdb

在使用前需要编译gdb脚本去生成constants.py,否则对应的接口调用时会提示constants.py找不到的,比如proc.py:15:from linux import constants调用

1
make scripts_gdb

配置~/.gdbinit,增加gdb插件配置

1
2
set debug auto-load on   
set auto-load safe-path /github/linux

之后在linux目录下开始gdb调试

使用

先检查是否生效

1
2
3
4
5
6
7
8
9
10
11
12
13
(gdb) apropos lx
...
lx-dmesg -- Print Linux kernel log buffer
lx-fdtdump -- Output Flattened Device Tree header and dump FDT blob to the filename
lx-genpd-summary -- Print genpd summary
lx-iomem -- Identify the IO memory resource locations defined by the kernel
lx-ioports -- Identify the IO port resource locations defined by the kernel
lx-list-check -- Verify a list consistency
lx-lsmod -- List currently loaded modules
lx-mounts -- Report the VFS mounts of the current process namespace
lx-ps -- Dump Linux tasks
lx-symbols -- (Re-)load symbols of Linux kernel and currently loaded modules
...

内核先插入test-A.ko和test-B.ko,进行notifier注册

1
2
3
4
/lib/modules/5.17.0+ # insmod test-A.ko
[ 28.098009] enter notifier_test_A_init pid is 103
/lib/modules/5.17.0+ # insmod test-B.ko
[ 64.108107] enter notifier_test_B_init pid is 112

gdb在attach到内核后,通过lx-symbols自动插入模块symbol到gdb

1
2
3
4
5
6
7
8
9
10
11
12
//先切换到模块目录
(gdb) cd /github/linux-driver/
Working directory /github/linux-driver.
(gdb) cd 1-notifier/
Working directory /github/linux-driver/1-notifier.
//自动插入模块symbols
(gdb) lx-symbols
loading vmlinux
scanning for modules in /github/linux-driver/1-notifier
loading @0xffffffffc0005000: /github/linux-driver/1-notifier/test-B.ko
loading @0xffffffffc0000000: /github/linux-driver/1-notifier/test_A.ko

在test-C注册时调试notifier中的链表,增加对链表的空间理解和gdb对链表的调试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
(gdb) b call_test_notifier
Breakpoint 1 at 0xffffffffc0000000: file /github/linux-driver/1-notifier/test-A.c, line 16.
(gdb) b register_test_notifier
Breakpoint 2 at 0xffffffffc0000020: file /github/linux-driver/1-notifier/test-A.c, line 22.
//内核插入test-C,触发断点
(gdb) c
Continuing.
loading @0xffffffffc000a000: /github/linux-driver/1-notifier/test-C.ko
[Switching to Thread 2]

Thread 2 hit Breakpoint 1, call_test_notifier (action=1, data=0xffffffffc000b038)
at /github/linux-driver/1-notifier/test-A.c:16
16 static int call_test_notifier(unsigned long action, void *data) {
(gdb) n
17 return raw_notifier_call_chain(&notifier_test, action, data);

//开始对链表数据的调试,具体数据关系见README
//事件为1, data为no use
(gdb) p action
$12 = 1
(gdb) x/s data
0xffffffffc000b038: "no use"

//开始调试链表
//我们知道,虽然test_nb_1在test_nb_0后面注册,但是优先级高,所以排前面,notifier是单向优先级链表
(gdb) p notifier_test
$14 = {head = 0xffffffffc0007000 <test_nb_1>} >>head指向test_nb_1

(gdb) p &test_nb_1 >>地址和head指向一致
$17 = (struct notifier_block *) 0xffffffffc0007000 <test_nb_1>

(gdb) p test_nb_1
$16 = {notifier_call = 0xffffffffc0005070 <notifier_test_deal_1>, next = 0xffffffffc0007020 <test_nb_0>, priority = 1} >>next指向下一个node,为test_nb_0,指向函数为notifier_test_deal_1

(gdb) info address notifier_test_deal_1 >>函数地址和test_nb_1.notifier_call一致
Symbol "notifier_test_deal_1" is a function at address 0xffffffffc0005070.

(gdb) p test_nb_0
$19 = {notifier_call = 0xffffffffc0005000 <notifier_test_deal_0>, next = 0x0 <fixed_percpu_data>, priority = 0} >>最后一个node的next指向NULL

//成员偏移确定
(gdb) p &test_nb_1
$20 = (struct notifier_block *) 0xffffffffc0007000 <test_nb_1>
(gdb) p &test_nb_1.notifier_call >>第一个成员地址为结构体实例地址
$21 = (notifier_fn_t *) 0xffffffffc0007000 <test_nb_1>
(gdb) p &test_nb_1.next
$22 = (struct notifier_block **) 0xffffffffc0007008 <test_nb_1+8> >>说明指针大小为8个字节
(gdb) p &test_nb_1.priority
$23 = (int *) 0xffffffffc0007010 <test_nb_1+16> >>说明指针大小为8个字节

(gdb) p sizeof(test_nb_1)
$36 = 24
(gdb) p sizeof(int)
$37 = 4

//int本身为4个字节,因此从priority8个字节开始都属于test_nb_1,而&test_nb_0地址为0xffffffffc0007020,
0xffffffffc0007018---0xffffffffc000701f这段内存不知道为什么空出来?
(gdb) p &test_nb_0
$34 = (struct notifier_block *) 0xffffffffc0007020 <test_nb_0>
(gdb) x/x 0xffffffffc0007018
0xffffffffc0007018: 0x00
(gdb) x/x 0xffffffffc000701f
0xffffffffc000701f: 0x00 >>从0xffffffffc0007018---0xffffffffc000701f一直为空
(gdb) x/x 0xffffffffc0007020
0xffffffffc0007020 <test_nb_0>: 0x00