在进行功能调试或者问题定位时,经常需要找一下哪里触发的系统调用,并跟踪一下系统调用过程。

一种方法是使用simpleperf

1
2
3
4
5
simpleperf record -g -o <path_to_perf.data> -- <path_to_exe_with_debuginfo>
python3 <path-to-ndk>/android-ndk-r27c/simpleperf/gecko_profile_generator.py \
      -i <path_to_perf.data> \
      --symfs <path_to_symbols> \
      --kallsyms <path_to_System.map>  | gzip > <path_to_perf.json.gz>

浏览器打开https://profiler.firefox.com/将生成的add_client.perf.json.gz拖进去,就可以查看调用树、火焰图、栈图等进一步分析函数调用关系。

参考另一篇文章 尝试通过一个demo分析binder的执行流程 的使用simpleperf抓取通讯双方的函数调用栈小节

或者使用基于eBPF的工具比如stackplz 参考 :https://github.com/SeeFlowerX/stackplz

这两种方式在尝试定位某些问题时比较受限,比如kernel启动早期时。因此本文尝试一种更加直接的方式:直接在目标位置打印函数调用栈。

获取用户态和内核态的函数调用栈

首先确保 CONFIG_STACKTRACE CONFIG_KALLSYMSCONFIG_USER_STACKTRACE_SUPPORT内核宏打开

可以通过 zcat /proc/config.gz | grep -E "STACKTRACE|KALLSYMS" 确认。

在代码中插入以下代码:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#if IS_ENABLED(CONFIG_STACKTRACE)
#include <linux/stacktrace.h>
#define STACK_ENTRIES 64

/*
 * 打印当前进程的
 * 1) 内核调用栈(符号名)
 * 2) 用户空间调用栈(原始 PC 地址)
 */
static void dump_all_stacks(void)
{
    int i;
    unsigned int nr;
    unsigned long entries[STACK_ENTRIES];

    /* 基本进程信息 */
    pr_info("pid=%d comm=%s\n",
            current->pid, current->comm);

    memset(entries, 0, sizeof(entries));
    nr = stack_trace_save(entries, ARRAY_SIZE(entries), 0);

    pr_info("kernel backtrace:\n");
    for (i = 0; i < nr; i++) {
        /* %pS 依赖 CONFIG_KALLSYMS,可以打印出符号名 */
        pr_info("  [k%02d] %pS\n", i, (void *)entries[i]);
    }

#if IS_ENABLED(CONFIG_USER_STACKTRACE_SUPPORT)
    /* -------- 用户空间调用栈 -------- */
    memset(entries, 0, sizeof(entries));

    /*
     * 注意:save_stack_trace_user() 只能对 current 生效,
     * 会保存从用户态进入内核时的那一组返回地址。
     */
    nr = stack_trace_save_user(entries, ARRAY_SIZE(entries));

    pr_info("user backtrace (raw user PCs):\n");
    for (i = 0; i < nr; i++) {
        /*
         * 这些是用户虚拟地址,需要 offline 用 addr2line 等工具
         * 结合 /proc/<pid>/maps 的映射基址还原成具体函数名/行号。
         */
        pr_info("  [u%02d] 0x%016lx\n", i, entries[i]);
    }
#else
    pr_info("user stack trace not supported "
            "(CONFIG_USER_STACKTRACE_SUPPORT is disabled)\n");
#endif /* CONFIG_USER_STACKTRACE_SUPPORT */
}
#else
static inline void dump_all_stacks(void) {}
#endif /* CONFIG_STACKTRACE */

可以在目标位置插入调用dump_all_stacks()。 抓内核日志。

v2-8231221c3528c29fe62e04edde02358c_720w.png

解析用户态调用栈

准备了一个python脚本来解析用户态的堆栈,该脚本会计算堆栈中地址相对于符号文件的偏移,并用addr2line尝试解析符号。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
binder_user_stack_resolver.py

用法示例:

1) 先把内核日志导出来:
   adb shell dmesg | grep binder_open > binder.log

2) 把 /proc/<pid>/maps 导出来:
   adb shell cat /proc/1234/maps > maps_1234.txt

3) 在宿主机上执行(假设已提取 system 映像到 /path/to/symbols):

   python3 binder_user_stack_resolver.py \
       --pid 1234 \
       --log binder.log \
       --maps maps_1234.txt \
       --sym-root /path/to/symbols \
       --addr2line aarch64-linux-android-addr2line

说明:
- sym-root 目录下应当能找到类似 /system/bin/surfaceflinger 这样的路径
  (例如解包 system.img 后的根目录)
- addr2line 可以是 aarch64-linux-android-addr2line 或 llvm-addr2line 等
"""

import argparse
import os
import re
import subprocess
import sys
from typing import List, Tuple, Dict, Optional


class MapEntry:
    def __init__(self, start: int, end: int, offset: int, path: str):
        self.start = start
        self.end = end
        self.offset = offset
        self.path = path

    def contains(self, addr: int) -> bool:
        return self.start <= addr < self.end

    def file_offset(self, addr: int) -> int:
        """
        计算 addr 在 ELF 文件内的偏移:
        file_off = (addr - start_vma) + file_offset_column
        """
        return (addr - self.start) + self.offset


def parse_maps(maps_path: str) -> List[MapEntry]:
    entries: List[MapEntry] = []
    with open(maps_path, "r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            # 形如:
            # start-end perms offset dev inode pathname
            # 0000007f8a100000-0000007f8a200000 r-xp 00000000 08:01 123456 /system/bin/surfaceflinger
            parts = line.split()
            if len(parts) < 5:
                continue

            addr_range = parts[0]
            perms = parts[1]
            offset_str = parts[2]
            path = parts[5] if len(parts) >= 6 else ""

            # 只关心可执行映射
            if "x" not in perms:
                continue

            try:
                start_str, end_str = addr_range.split("-")
                start = int(start_str, 16)
                end = int(end_str, 16)
                offset = int(offset_str, 16)
            except ValueError:
                continue

            if not path or path == "[vdso]" or path.startswith('['):
                continue

            entries.append(MapEntry(start, end, offset, path))
    return entries


def find_mapping(maps: List[MapEntry], addr: int) -> Optional[MapEntry]:
    for m in maps:
        if m.contains(addr):
            return m
    return None


def resolve_addr(addr: int, maps: List[MapEntry], sym_root: str, addr2line_bin: str) -> str:
    m = find_mapping(maps, addr)
    if not m:
        return f"0x{addr:016x} <no mapping>"

    file_off = m.file_offset(addr)

    # 把 /system/bin/surfaceflinger 这种路径映射到 sym_root/system/bin/surfaceflinger
    rel_path = m.path.lstrip("/")  # 去掉开头的 '/'
    elf_path = os.path.join(sym_root, rel_path)

    if not os.path.exists(elf_path):
        return f"0x{addr:016x} {m.path}+0x{file_off:x} (ELF not found: {elf_path})"

    cmd = [addr2line_bin, "-C", "-f", "-e", elf_path, f"0x{file_off:x}"]
    try:
        out = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
        decoded = out.decode("utf-8", errors="ignore").strip().splitlines()
        if len(decoded) >= 2:
            func = decoded[0]
            loc = decoded[1]
            return f"0x{addr:016x} {m.path}+0x{file_off:x} => {func} @ {loc}"
        elif decoded:
            return f"0x{addr:016x} {m.path}+0x{file_off:x} => {decoded[0]}"
        else:
            return f"0x{addr:016x} {m.path}+0x{file_off:x} (no addr2line output)"
    except subprocess.CalledProcessError as e:
        return f"0x{addr:016x} {m.path}+0x{file_off:x} (addr2line error: {e})"


def parse_log_for_pid(log_path: str, pid: int) -> List[int]:
    """
    从 binder log 中抽取指定 pid 的 [uXX] 行的地址
    """
    addrs: List[int] = []
    current_pid: Optional[int] = None
    in_user_bt = False

    pid_line_re = re.compile(r"binder_open:\s+pid=(\d+)\s+comm=")
    user_bt_re = re.compile(r"binder_open:\s+user backtrace")
    addr_re = re.compile(r"\[u\d+\]\s+0x([0-9a-fA-F]+)")

    with open(log_path, "r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            line = line.rstrip("\n")

            m = pid_line_re.search(line)
            if m:
                current_pid = int(m.group(1))
                in_user_bt = False
                continue

            if current_pid == pid and user_bt_re.search(line):
                in_user_bt = True
                continue

            if in_user_bt and current_pid == pid:
                m2 = addr_re.search(line)
                if m2:
                    addr_str = m2.group(1)
                    addr = int(addr_str, 16)
                    addrs.append(addr)
                else:
                    # 遇到不是 [uXX] 的行,认为本次 user backtrace 结束
                    if line.strip() == "" or line.startswith("binder_open:"):
                        in_user_bt = False

    return addrs


def main():
    parser = argparse.ArgumentParser(description="Resolve SurfaceFlinger binder_open user stacks")
    parser.add_argument("--pid", type=int, required=True, help="surfaceflinger 的 pid")
    parser.add_argument("--log", required=True, help="包含 binder_open 打印的 log 文件")
    parser.add_argument("--maps", required=True, help="/proc/<pid>/maps 导出的文件路径")
    parser.add_argument("--sym-root", required=True, help="带符号 system/so/root 的根目录")
    parser.add_argument("--addr2line", default="addr2line",
                        help="addr2line 可执行文件名(默认: addr2line,可改为 aarch64-linux-android-addr2line)")

    args = parser.parse_args()

    maps = parse_maps(args.maps)
    if not maps:
        print("ERROR: no executable mappings parsed from maps file", file=sys.stderr)
        sys.exit(1)

    addrs = parse_log_for_pid(args.log, args.pid)
    if not addrs:
        print(f"ERROR: no user backtrace addresses found in log for pid={args.pid}", file=sys.stderr)
        sys.exit(1)

    print(f"# binder_open user stack for pid={args.pid}")
    for addr in addrs:
        resolved = resolve_addr(addr, maps, args.sym_root, args.addr2line)
        print(resolved)


if __name__ == "__main__":
    main()

解析示例

v2-1c6ad1e9a49a4784aacec07f570deefd~resize_1440_q75.png

原理分析

1. 发生系统调用时保存现场

以用户空间执行 open("/dev/binder") 的系统调用为例,指令是: svc #0

CPU 硬件做的事情:

  • 当 EL0 执行 svc #imm:
  • CPU 切换异常级:EL0 → EL1;
  • 用户 PSTATE 保存到 SPSR_EL1
  • 用户 PC(svc 下一条指令地址)保存到 ELR_EL1
  • 使用异常向量表中 EL0 同步异常的入口地址(VBAR_EL1 指向的一张表):
  • 跳转到 el0_sync(或类似名字)的入口
  • 切换栈指针:
  • 使用 SP_EL1 作为栈指针(这时已经是内核栈)
  • SP_EL0 保留的是用户态栈,暂时不会动

注意:此时 x0–x30 里的值仍然是用户态的寄存器值,CPU 没帮你保存到内存,必须靠内核汇编自己存。

1
2
3
4
5
6
arch/arm64/kernel/head.S
__HEAD
    primary_entry
        __primary_switched
            adr_l	x8, vectors			// load VBAR_EL1 with virtual
            msr	        vbar_el1, x8			// vector table address

vectors填入vbar_el1寄存器中,其中vectors是一个全局标记:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
    arch/arm64/kernel/entry.S
    /*
     * Exception vectors.
     */
    	.pushsection &#34;.entry.text&#34;, &#34;ax&#34;

    	.align	11
    SYM_CODE_START(vectors)
    	kernel_ventry	0, t, 64, sync		// Synchronous 64-bit EL0
    SYM_CODE_END(vectors)

vectors处通过kernel_ventry定义了多个入口:

1
2
3
4
    .macro kernel_ventry, el:req, ht:req, regsize:req, label:req
    .align 7
    sub	sp, sp, #PT_REGS_SIZE
    b	el\el\ht\()_\regsize\()_\label

先预留PT_REGS_SIZE大小的栈空间,然后跳转到el0t_64_sync处执行

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
    SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label)
    	kernel_entry \el, \regsize
    	mov	x0, sp
    	bl	el\el\ht\()_\regsize\()_\label\()_handler
    	.if \el == 0
    	b	ret_to_user
    	.else
    	b	ret_to_kernel
    	.endif
    SYM_CODE_END(el\el\ht\()_\regsize\()_\label)
    	.endm

kernel_entry \el, \regsize宏处保存寄存器信息到内核栈中

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
    .macro	kernel_entry, el, regsize = 64
    	stp	x0, x1, [sp, #16 * 0]
    	stp	x2, x3, [sp, #16 * 1]
    	stp	x4, x5, [sp, #16 * 2]
    	stp	x6, x7, [sp, #16 * 3]
    	stp	x8, x9, [sp, #16 * 4]
    	stp	x10, x11, [sp, #16 * 5]
    	stp	x12, x13, [sp, #16 * 6]
    	stp	x14, x15, [sp, #16 * 7]
    	stp	x16, x17, [sp, #16 * 8]
    	stp	x18, x19, [sp, #16 * 9]
    	stp	x20, x21, [sp, #16 * 10]
    	stp	x22, x23, [sp, #16 * 11]
    	stp	x24, x25, [sp, #16 * 12]
    	stp	x26, x27, [sp, #16 * 13]
    	stp	x28, x29, [sp, #16 * 14]

    	.if	\el == 0
    	clear_gp_regs
    	mrs	x21, sp_el0
    	ldr_this_cpu	tsk, __entry_task, x20
    	msr	sp_el0, tsk
    	.else
    	add	x21, sp, #PT_REGS_SIZE
    	get_current_task tsk
    	.endif /* \el == 0 */
    	mrs	x22, elr_el1
    	mrs	x23, spsr_el1
    	stp	lr, x21, [sp, #S_LR]

    	/*
    	 * For exceptions from EL0, create a final frame record.
    	 * For exceptions from EL1, create a synthetic frame record so the
    	 * interrupted code shows up in the backtrace.
    	 */
    	.if \el == 0
    	stp	xzr, xzr, [sp, #S_STACKFRAME] //  pt_regs 区域里写一个终止 frame record(FP、LR = 0),方便 unwinder 知道到头了。
    	.else
    	stp	x29, x22, [sp, #S_STACKFRAME]
    	.endif
    	add	x29, sp, #S_STACKFRAME

之后栈布局如下

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
低地址
+-------------------------+
| struct thread_info      |  <--  task_struct 关联
+-------------------------+
|                         |
|       内核栈空间         |  <-- 内核函数调用使用的栈
|                         |
|   ... (正常内核栈帧)      |
|                         |
+-------------------------+  <-- THREAD_SIZE 对齐的栈顶
|     struct pt_regs      |  <-- 从用户态陷入时压入的寄存器快照
+-------------------------+
高地址

2. stack_trace的实现

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#ifdef CONFIG_USER_STACKTRACE_SUPPORT
/**
 * stack_trace_save_user - Save a user space stack trace into a storage array
 * @store:	Pointer to storage array
 * @size:	Size of the storage array
 *
 * Return: Number of trace entries stored.
 */
unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
{
	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;//函数指针 
	struct stacktrace_cookie c = {
		.store	= store,
		.size	= size,
	};

	/* Trace user stack if not a kernel thread */
	if (current->flags & PF_KTHREAD)
		return 0;

	arch_stack_walk_user(consume_entry, &c, task_pt_regs(current));

	return c.len;
}
#endif

arch_stack_walk_user是一个架构相关的函数,定义在arch/arm64/kernel/stacktrace.c中:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
					const struct pt_regs *regs)
{
	if (!consume_entry(cookie, regs->pc))
		return;

	if (!compat_user_mode(regs)) {
		/* AARCH64 mode */
		struct frame_tail __user *tail;

		tail = (struct frame_tail __user *)regs->regs[29];
		while (tail && !((unsigned long)tail & 0x7))
			tail = unwind_user_frame(tail, cookie, consume_entry);
	} else {
                /* ...... */
	}
}

task_pt_regs(current)

1
2
#define task_pt_regs(p) \
	((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
1
2
3
4
5
6
7
8
9
/*
 * When accessing the stack of a non-current task that might exit, use
 * try_get_task_stack() instead.  task_stack_page will return a pointer
 * that could get freed out from under you.
 */
static __always_inline void *task_stack_page(const struct task_struct *task)
{
	return task->stack;
}

其中THREAD_SIZE 是内核栈的大小,task_stack_page拿到的是栈的地址。 因此task_pt_regs(current)拿到的就是当前进程内核栈上保存的中断线程,然后 arch_stack_walk_user从中找到x29寄存器,并依次去找lr指针和fp指针,就可以抓到用户态的调用栈。