本文共 18104 字,大约阅读时间需要 60 分钟。
关于ftrace提供的接口应用如下所示:
static struct ftrace_ops kpatch_ftrace_ops __read_mostly = { .func = kpatch_ftrace_handler, .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,};ftrace_set_filter_ip(&kpatch_ftrace_ops, ip, 0, 0);register_ftrace_function(&kpatch_ftrace_ops);
//ftrace操作结构体struct ftrace_ops { ftrace_func_t func; struct ftrace_ops __rcu *next; unsigned long flags; void *private; ftrace_func_t saved_func;#ifdef CONFIG_DYNAMIC_FTRACE struct ftrace_ops_hash local_hash; struct ftrace_ops_hash *func_hash; struct ftrace_ops_hash old_hash; unsigned long trampoline; unsigned long trampoline_size; struct list_head list;#endif}enum { FTRACE_OPS_FL_ENABLED = 1 << 0, FTRACE_OPS_FL_GLOBAL = 1 << 1, FTRACE_OPS_FL_DYNAMIC = 1 << 2, FTRACE_OPS_FL_CONTROL = 1 << 3, FTRACE_OPS_FL_SAVE_REGS = 1 << 4, FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5, FTRACE_OPS_FL_RECURSION_SAFE = 1 << 6, FTRACE_OPS_FL_STUB = 1 << 7, FTRACE_OPS_FL_INITIALIZED = 1 << 8, FTRACE_OPS_FL_IPMODIFY = 1 << 9,};//ftrace_ops哈希链结构struct ftrace_ops_hash { struct ftrace_hash __rcu *notrace_hash; struct ftrace_hash __rcu *filter_hash; struct mutex regex_lock;};//ftrace哈希链结构struct ftrace_hash { unsigned long size_bits; struct hlist_head *buckets;//哈希桶 unsigned long count; unsigned long flags; struct rcu_head rcu;};//ftrace设置过滤地址int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset){ //初始ftrace_ops结构体对象 ftrace_ops_init(ops); //将所要更改指令的ip地址与ftrace_ops结构体对象关联 return ftrace_set_addr(ops, ip, remove, reset, 1);}static inline void ftrace_ops_init(struct ftrace_ops *ops){ //打开ftrace动态跟踪设置#ifdef CONFIG_DYNAMIC_FTRACE if (!(ops->flags & FTRACE_OPS_FL_INITALIZED)) { mutex_init(&ops->local_hash.regex_lock); ops->func_hash = &ops->local_hash; ops->flags |= FTRACE_OPS_FL_INITIALIZED; //设置哈希链以及标志位 }#endif}static int ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, int reset, int enable){ //ip和ftrace_ops的关联主要通过结构体中的哈希链结构来实现 return ftrace_set_hash(ops, NULL, 0, ip, remove, reset, enable);}static int ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, unsigned long ip, int remove, int reset, int enable){ struct ftrace_hash **orig_hash; struct ftrace_hash *hash; int ret; if (unlikely(ftrace_disabled)) return -ENODEV; mutex_lock(&ops->func_hash->regex_lock); if (enable) //如果使能 orig_hash = &ops->func_hash->filter_hash; //设置哈希链结构,注意这里为二级指针,‘&’优先级低于’->‘ //不采用一级指针的原因是因为filter_hash为一级指针且值为NULL,因此无法直接赋值给其他一级指针。但是filter_hash指针自身的地址存在,可将其赋值给二级指针,这样*(struct ftrace_hash **)orig_hash便可以取到filter_hash中的地址值,但此时*orig_hash为NULL。 else orig_hash = &ops->func_hash->notrace_hash; if (reset) hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); else hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); //创建哈希链 //#define FTRACE_HASH_DEFAULT_BITS 10 if (!hash) { ret = -ENOMEM; goto out_regex_unlock; } if (buf && !ftrace_match_records(hash, buf, len)) { ret = -EINVAL; goto out_regex_unlock; } if (ip) { //ip存在,执行该分支 ret = ftrace_match_addr(hash, ip, remove); //查看该地址是否存在 if (ret < 0) goto out_regex_unlock; } mutex_lock(&ftrace_lock); ret = ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable); //将orig_hash与hash相关联,即ops->func_hash->filter_hash=hash mutex_unlock(&ftrace_lock);out_regex_unlock: mutex_unlock(&ops->func_hash->regex_lock); free_ftrace_hash(hash); return ret;}static struct ftrace_hash *alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash){ struct ftrace_func_entry *entry; struct ftrace_hash *new_hash; int size; int ret; int i; new_hash = alloc_ftrace_hash(size_bits); //创建ftrace_hash结构体对象,并对其进行初始 if (!new_hash) return NULL; if (hash) new_hash->flags = hash->flags; if (ftrace_hash_empty(hash)) //hash为NULL,直接返回创建的ftrace_hash结构体对象 return new_hash; size = 1 << hash->size_bits; for (i = 0; i < size; i++) { hlist_for_each_entry(entry, &hash->buckets[i], hlist) { ret = add_hash_entry(new_hash, entry->ip); if (ret < 0) goto free_hash; } } FTRACE_WARN_ON(new_hash->count != hash->count); return new_hash;free_hash: free_ftrace_hash(new_hash); return NULL;}static struct ftrace_hash *alloc_ftrace_hash(int size_bits){ struct ftrace_hash *hash; int size; hash = kzalloc(sizeof(*hash), GFP_KERNEL); //创建ftrace_hash结构体对象 if (!hash) return NULL; size = 1 << size_bits; hash->buckets = kcalloc(size, sizeof(*hash->buckets), GFP_KERNEL); //初始ftrace_hash结构体中的hlist_head结构体对象,即分配size个哈希桶结构体对象 if (!hash->buckets) { kfree(hash); return NULL; } hash->size_bits = size_bits; return hash;}static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash){ return !hash || !(hash->count || (hash->flags & FTRACE_HASH_FL_MOD));}static int ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove){ struct ftrace_func_entry *entry; if (!ftrace_location(ip)) //检查ip地址是否真实存在 return -EINVAL; if (remove) { entry = ftrace_lookup_ip(hash, ip); if (!entry) return -ENOENT; free_hash_entry(hash, entry); return 0; } return add_hash_entry(hash, ip); //如果ip存在,则将其插入哈希链中 }/*unsigned long ftrace_location(unsigned long ip){ return ftrace_location_range(ip, ip);}struct ftrace_page { struct ftrace_page *next; struct dyn_ftrace *records; int index; int size;};//ftrace动态跟踪结构体struct dyn_ftrace { unsigned long ip; unsigned long flags; struct dyn_arch_ftrace arch;};//该结构体与架构相关,MIPS/X86等架构中为空struct dyn_arch_ftrace {};unsigned long ftrace_location_range(unsigned long start, unsigned long end){ struct dyn_ftrace *rec; rec = lookup_rec(start, end); if (rec) return rec->ip; return 0;}//查找recstatic struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end){ struct ftrace_page *pg; struct dyn_ftrace *rec = NULL; struct dyn_ftrace key; key.ip = start; key.flags = end; for (pg = ftrace_pages_start; pg; pg = pg->next) { //遍历ftrace中所有的ftrace_page if (end < pg->records[0].ip || start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) continue; rec = bsearch(&key, pg->records, pg->index, sizeof(struct dyn_ftrace), ftrace_cmp_recs); if (rec) break; } return rec;}void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp){ return __inline_bsearch(key, base, num, size, cmp);}//二进制搜索static __always_inline void *__inline_bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp){ const char *pivot; //地址 int result; while (num > 0) { pivot = base + (num >> 1) * size; //获取dyn_ftrace结构体对象 result = cmp(key, pivot); //该函数为回调函数 if (result == 0) return (void *)pivot; if (result > 0) { base = pivot + size; num--; } num >>= 1; } return NULL;}//dyn_ftrace结构体比较static int ftrace_cmp_recs(const void *a, const void *b){ const struct dyn_ftrace *key = a; const struct dyn_ftrace *rec = b; if (key->flags < rec->ip) return -1; if (key->ip >= rec->ip + MCOUNT_INSN_SIZE) //MCOUNT_INSN_SIZE为mcount函数所对应的汇编指令以二进制形式所占据的字节大小 return 1; return 0;}//根据代码,可知所要查找的ip地址与dyn_ftrace所指向的地址关系为:// ip | dyn_ftrace,该情况为错误// ip|dyn_ftrace,该情况为两者为同一地址// dyn_ftrace | dyn_ftrace|ip,该情况下,ip在下一个dyn_ftrace位置处 */static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip){ struct ftrace_func_entry *entry; /* struct ftrace_func_entry { struct hlist_node hlist; unsigned long ip; unsigned long direct; }; */ entry = kmalloc(sizeof(*entry), GFP_KERNEL); //创建ftrace_func_entry结构体对象 if (!entry) return -ENOMEM; entry->ip = ip; __add_hash_entry(hash, entry); //添加到哈希链中 return 0;} static void __add_hash_entry(struct ftrace_hash *hash, struct ftrace_func_entry *entry){ struct hlist_head *hhd; unsigned long key; key = ftrace_hash_key(hash, entry->ip); //计算哈希索引 hhd = &hash->buckets[key]; hlist_add_head(&entry->hlist, hhd); hash->count++;}/*static int ftrace_match_records(struct ftrace_hash *hash, char *buff, int len){ return match_records(hash, buff, len, NULL);}static int match_records(struct ftrace_hash *hash, char *func, int len, char *mod){ struct ftrace_page *pg; struct dyn_ftrace *rec; struct ftrace_glob func_g = { .type = MATCH_FULL }; struct ftrace_glob mod_g = { .type = MATCH_FULL }; struct ftrace_glob *mod_match = (mod) ? &mod_g : NULL; //struct ftrace_glob { // char *search; // unsigned len; // int type; //}; int exclude_mod = 0; int found = 0; int ret; int clear_filter = 0; if (func) { func_g.type = filter_parse_regex(func, len, &func_g.search, &clear_filter); func_g.len = strlen(func_g.search); } if (mod) { mod_g.type = filter_parse_regex(mod, strlen(mod), &mod_g_search, &exclude_mod); mod_g.len = strlen(mod_g.search) } mutex_lock(&ftrace_lock); if (unlikely(ftrace_disabled)) goto out_unlock; if (func_g.type == MATCH_INDEX) { found = add_rec_by_index(hash, &func_g, clear_filter); goto out_unlock; } do_for_each_ftrace_rec(pg, rec) { if (rec->flags & FTRACE_FL_DISABLED) continue; if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) { ret = enter_record(hash, rec, clear_filter); if (ret < 0) { found = ret; goto out_unlock; } found = 1; } } while_for_each_ftrace_rec();out_unlock: mutex_unlock(&ftrace_lock); return found;}*/static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, struct ftrace_hash **orig_hash, struct ftrace_hash *hash, int enable){ struct ftrace_ops_hash old_hash_ops; struct ftrace_hash *old_hash; int ret; old_hash = *orig_hash; old_hash_ops.filter_hash = ops->func_hash->filter_hash; old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; ret = ftrace_hash_move(ops, enable, orig_hash, hash); if (!ret) { ftrae_ops_update_code(ops, &old_hash_ops); free_ftrace_hash_rcu(old_hash); } return ret;}/*static int ftrace_hash_move(struct ftrace_ops *ops, int enable, struct ftrace_hash **dst, struct ftrace_hash *src){ struct ftrace_hash *new_hash; int ret; if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable) return -EINVAL; new_hash = __ftrace_hash_move(src); if (!new_hash) return -ENOMEM; if (enable) { ret = ftrace_hash_ipmodify_update(ops, new_hash); if (ret < 0) { free_ftrace_hash(new_hash); return ret; } } ftrace_hash_rec_disable_modify(ops, enable); rcu_assign_pointer(*dst, new_hash); ftrace_hash_rec_enable_modify(ops, enable);}static struct ftrace_hash *__ftrace_hash_move(struct ftrace_hash *src){ struct ftrace_func_entry *entry; struct hlist_node *tn; struct hlist_head *hhd; struct ftrace_hash *new_hash; int size = src->count; int bits = 0; int i; if (ftrace_hash_empty(src)) return EMPTY_HASH; for (size /= 2; size; size >>= 1) bits++; if (bits > FTRACE_HASH_MAX_BITS) bits =FTRACE_HASH_MAX_BITS; new_hash = alloc_ftrace_hash(bits); if (!new_hash) return NULL; new_hash->flags = src->flags; size = 1 << src->size_bits; for (i = 0; i < size; i++) { hhd = &src->buckets[i]; hlist_for_each_entry_safe(entry, tn, hhd, hlist) { remove_hash_entry(src, entry); __add_hash_entry(new_hash, entry); } } return new_hash;}*///注册ftrace_ops结构体int register_ftrace_function(struct ftrace_ops *ops){ int ret = -1; ftrace_ops_init(ops); //之前已置位,不再执行 mutex_lock(&ftrace_lock); ret = ftrace_startup(ops, 0); //启动ftrace mutex_unlock(&ftrace_lock); return ret;}int ftrace_startup(struct ftrace_ops *ops, int command){ int ret; if (unlikely(ftrace_disabled)) return -ENODEV; ret = __register_ftrace_function(ops); //注册ftrace_ops结构体对象 if (ret) return ret; ftrace_start_up++; ops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_ADDING; ret = ftrace_hash_ipmodify_enable(ops); if (ret < 0) { __unregister_ftrace_function(ops); ftrace_start_up--; ops->flags &= ~FTRACE_OPS_FL_ENABLED; if (ops->flags & FTRACE_OPS_FL_DYNAMIC) ftrace_trampoline_free(ops); return ret; } if (ftrace_hash_rec_enable(ops, 1)) command |= FTRACE_UPDATE_CALLS; ftrace_startup_enable(command); //使能ftrace ops->flags &= ~FTRACE_OPS_FL_ADDING; return 0;}static int __register_ftrace_function(struct ftrace_ops *ops){ if (ops->flags & FTRACE_OPS_FL_DELETED) return -EINVAL; if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) return -EBUSY;#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS if (ops->flags & FTRACE_OPS_FL_SAVE_REGS && !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)) return -EINVAL; if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED) ops->flags |= FTRACE_OPS_FL_SAVE_REGS;#endif if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; add_ftrace_ops(&ftrace_ops_list, ops); // ftrace_ops添加到全局链表ftrace_ops_list中 ops->saved_func = ops->func; if (ftrace_pids_enabled(ops)) ops->func = ftrace_pid_func; ftrace_update_trampoline(ops); if (ftrace_enabled) update_ftrace_function(); return 0;}static void ftrace_startup_enable(int command){ if (saved_ftrace_func != ftrace_trace_function) { saved_ftrace_func = ftrace_trace_function; command |= FTRACE_UPDATE_TRACE_FUNC; } if (!command || !ftrace_enabled) return; ftrace_run_update_code(command);}static void ftrace_run_update_code(int command){ int ret; ret = ftrace_arch_code_modify_prepare(); FTRACE_WARN_ON(ret); if (ret) return; arch_ftrace_update_code(command); ret = ftrace_arch_code_modify_post_process(); FTRACE_WARN_ON(ret);}void __weak arch_ftrace_update_code(int command){ ftrace_modify_all_code(command);}void ftrace_modify_all_code(int command){ int update = command & FTRACE_UPDATE_TRACE_FUNC; int err = 0; if (update) { err = ftrace_update_ftrace_func(ftrace_ops_list_func); //将ftrace_call()函数入口的第一条的指令替换为jal ftrace_ops_list_func //注意:是ftrace_call()函数,该函数被ftrace_caller()调用 if (FTRACE_WARN_ON(err)) return; } if (command & FTRACE_UPDATE_CALLS) ftrace_replace_code(1); //将调用ftrace_caller()函数的指令由nop指令改为jal ftrace_caller+8 else if (command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); if (update && ftrace_trace_function != ftrace_ops_list_func) { function_trace_op = set_function_trace_op; smp_wmb(); if (!irqs_disabled()) smp_call_function(ftrace_sync_ipi, NULL, 1); err = ftrace_update_ftrace_func(ftrace_trace_function); if (FTRACE_WARN_ON(err)) return; } if (command & FTRACE_START_FUNC_RET) err = ftrace_enable_ftrace_graph_caller(); else if (command & FTRACE_STOP_FUNC_RET) err = ftrace_disable_ftrace_graph_caller(); FTRACE_WARN_ON(err);}#define JAL 0x0c000000#define ADDR_MASK 0x03ffffff#define INSN_JAL(addr) ((unsigned long)(JAL | (((addr) >> 2)& ADDR_MASK)))#define FTRACE_CALL_IP ((unsigned long)(&ftrace_call))int ftrace_update_ftrace_func(ftrace_func_t func){ unsigned int new; new = INSN_JAL((unsigned long)func); //假设:&func = 0x802e7fc8 //new = 0x0c000000 | ((0x802e7fc8 >> 2) & 0x03ffffff) //new = 0x0c000000 | (0x400b9ff2 & 0x03ffffff) //new = 0x0c000000 | 0x000b9ff2 //new = 0x0c0b9ff2 return ftrace_modify_code(FTRACE_CALL_IP, new); }void __weak ftrace_replace_code(int enable){ struct dyn_ftrace *rec; struct ftrace_page *pg; int failed; if (unlikely(ftrace_disbaled)) return; do_for_each_ftrace_rec(pg, rec) { if (rec->flag & FTRACE_FL_DISABLED) continue; failed = __ftrace_replace_code(rec, enable); if (failed) { ftrace_bug(failed, rec); return; } } while_for_each_ftrace_rec();}static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable){ unsigned long ftrace_old_addr; unsigned long ftrace_addr; int ret; ftrace_addr = ftrace_get_addr_new(rec); ftrae_old_addr = ftrace_get_addr_curr(rec); ret = ftrace_update_record(rec, enable); ftrace_bug_type = FTRACE_BUG_UNKNOWN; switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; case FTRACE_UPDATE_MAKE_CALL: ftrace_bug_type = FTRACE_BUG_CALL; return ftrace_make_call(rec, ftrace_addr); case FTRACE_UPDATE_MAKE_NOP: ftrace_bug_type = FTRACE_BUG_NOP; return ftrace_make_nop(NULL, rec, ftrace_old_addr); case FTRACE_UPDATE_MODIFY_CALL: ftrace_bug_type = FTRACE_BUG_UPDATE; return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } return -1;}int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr){ unsigned int new; unsigned long ip = rec->ip; //判断ip地址的位置,来选则替换的指令,这是因为如果调用_mcount函数的地址与_mcount函数的位置超过256M,则指令不同 new = core_kernel_text(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0];#ifdef CONFIG_64BIT return ftrace_modify_code(ip, new);#else return ftrace_modify_code_2r(ip, new, core_kernel_text(ip) ? INSN_NOP : insn_la_mcount[1]);#endif}//通过对ftrace的分析,可知启动ftrace后,函数会执行下边的接口来执行trace所指向的函数#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip){ __ftrace_ops_list_func(ip, parent_ip, NULL, NULL);}static nokprobe_inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ignored, struct pt_regs *regs){ struct ftrace_ops *op; int bit; bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return 0; preempt_disable_notrace(); do_for_each_ftrace_op(op, ftrace_ops_list) { if ((!(op->flags & FTRACE_OPS_FL_RCU) || (rcu_is_watching())) && ftrace_ops_test(op, ip, regs)) { if (FTRACE_WARN_ON(!op->func)) { pr_warn("op=%p, %pS\n", op, op); goto out; } op->func(ip, parent_ip, op, regs); } } while_for_each_ftrace_op(op);out: preempt_enable_notrace(); trace_clear_recursion(bit);}
通过对上述ftrace执行流程的分析,可知,ftrace执行的整体过程如下:
gcc -pgkernel_function() { | jal ftracel_caller() { | b ftrace_stub | mcount_save_regs
ftrace_initkernel_function() { | nop() { | nop | mcount_save_regs
ftrace (ftrace_set_filter_ip,register_ftrace_function)kernel_function() { | jal ftracel_caller() + 8 { | mcount_save_regs (address:ftrace_caller+8) | ftrace_call() | jal ftrace_ops_list_func
需要注意的是,模块编译的结果与内核文本段编译的结果存在差别,即:
lui v0, x0daddiu v0, v0, x0move AT, rajalr v0nop
当注册内核模块时,会出现如下的调用顺序,
load_module | ftrace_module_init | ftrace_process_locs | ftrace_update_code | ftrace_code_disable | ftrace_make_nop
此时,指令会有所修改,变为:
b 4daddiu v0, v0, x0move AT, rajalr v0nop
这些便是内核模块的区别。
以上是基于MIPS架构的分析,其他架构可能存在差异。
转载地址:http://vpxii.baihongyu.com/