# Trying to get variable dependencies with libclang

如何用 libclang 的前端解析所有的.c .cpp 展開ast tree,目前進度只有 callee 和caller 的關係,還有參數 ,function variable 的資訊

假設你有安裝llvm 應該會在

```bash=
find /usr/lib/x86_64-linux-gnu -name libclang*
/usr/lib/x86_64-linux-gnu/libclang-10.so.1
/usr/lib/x86_64-linux-gnu/libclang-11.so.1
/usr/lib/x86_64-linux-gnu/libclang-cpp.so.10
/usr/lib/x86_64-linux-gnu/libclang-cpp.so.11

ln -s libclang.so.1 libclang.so
/usr/lib/x86_64-linux-gnu/libclang-10.so.1
/usr/lib/x86_64-linux-gnu/libclang-11.so.1
/usr/lib/x86_64-linux-gnu/libclang-cpp.so.10
/usr/lib/x86_64-linux-gnu/libclang-cpp.so.11
/usr/lib/x86_64-linux-gnu/libclang.so  <=== 請產生這個檔案 ,到這邊libclang 應該就可以用了
```

透過 clang 前端本來想要找出 variable 之間的依賴關係,嘗試了蠻多種方式,後來發現有cindex.CompilationDatabase.fromDirectory 這邊也有用 compiledb 將 makefile 裡面的database 給libclang看建立有ast依賴關係的tree

不過還是不太行 ,不過可以得到該專案裡面的所有.c .cpp 建立call graph ,加上一些資訊應該可以更好的看出variable引用關係. 照理說應該在compiler處理應該比較正確哈哈

/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/compile_commands.json

```python=
# Import necessary libraries import clang.cindex as cindex import json import os # Initialize libclang function_name = "" index = cindex.Index.create() # Specify the path to the compile_commands.json file compile_commands_file = '/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/compile_commands.json' # Read the compile_commands.json file with open(compile_commands_file, 'r') as f: compile_commands_data = json.load(f) # Create a set to store all .c and .cpp files source_files = set() # Iterate through each compilation command for command_data in compile_commands_data: # Get the source file path from the compilation command source_file = command_data['file'] # Check if the file extension is .c or .cpp if source_file.endswith('.c') or source_file.endswith('.cpp'): source_files.add("/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/" + source_file) # Create a dictionary to store function call relationships function_calls = {} # Iterate through the paths of all .c and .cpp files for source_file in source_files: print(f"Analyzing {source_file}...") compile_commands = cindex.CompilationDatabase.fromDirectory('/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/') # Get the compilation commands commands = compile_commands.getCompileCommands(source_file) # Create a list to store compilation options file_args = [] for command in commands: for argument in command.arguments: if str(argument).find("-I") >= 0: file_args.append(argument) # Parse the source code file translationUnit = index.parse( source_file, args=file_args, options=cindex.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD ) if not translationUnit: print(f"Failed to parse translation unit for {source_file}") else: # Get the root node of the abstract syntax tree (AST) rootCursor = translationUnit.cursor # Recursive function for traversing every node in the AST def visit(cursor, depth=0): global function_name if cursor.kind == cindex.CursorKind.FUNCTION_DECL: # When a function declaration is found function_name = cursor.displayname if function_name not in function_calls: function_calls[function_name] = set() # Print other functions called by this function for child in cursor.get_children(): if child.kind == cindex.CursorKind.CALL_EXPR: callee = child.referenced if callee: callee_name = callee.displayname function_calls[function_name].add(callee_name) for child in cursor.get_children(): visit(child, depth + 1) # Call the traversal function visit(rootCursor) function_calls_record = [] # Function to print function calls def print_function_calls(caller, depth=1): if caller in function_calls_record: return else: function_calls_record.append(caller) for callee in function_calls[caller]: if callee in function_calls_record: return else: function_calls_record.append(callee) indent = " " * depth print(f"{indent}Function Declaration: {caller}") print(f"{indent} Callee: {callee}") print_function_calls(callee, depth + 1) total_call_record = [] # Function to print function call stacks def print_function_calls(caller, depth=0): global function_calls_record if caller in function_calls_record: return else: function_calls_record.append(caller) indent = " " * depth print(f"{indent}Caller: {caller}") if caller in function_calls: for callee in function_calls[caller]: if callee in function_calls_record: return else: function_calls_record.append(callee) indent = " " * (depth+1) print(f"{indent}Callee: {callee}") if callee in function_calls: for x in function_calls[callee]: print_function_calls(x, depth + 2) # Traverse all functions and record function call stacks function_calls_record = [] for caller in function_calls: function_calls_record = [] print_function_calls(caller) total_call_record.append(function_calls_record) # possible_callers = [] # Use a list to store possible callers # Function to find possible callers of a target function # def find_possible_callers(target_function): # global possible_callers # for call_record in total_call_record: # if target_function in call_record: # if target_function not in possible_callers: # possible_callers.append(target_function) # for x in call_record[call_record.index(target_function):]: # if x != target_function and x not in possible_callers: # possible_callers.append(x) # find_possible_callers(x) # return possible_callers # # Example: Find possible callers of the "matrix_test" function # target_function = 'matrix_test(ee_u32, MATRES *, MATDAT *, MATDAT *, MATDAT)' # possible_callers = find_possible_callers(target_function) # if possible_callers: # print(f"Possible callers of {target_function}:")
#     for caller in possible_callers:
#         print(f"  {caller}")
# else:
#     print(f"No possible callers found for {target_function}")
```

```output=
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/utils.c... Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/stubs/isatty.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/stubs/fstat.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/stubs/read.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/stubs/write.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/reset.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/stubs/close.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/stubs/_exit.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/stubs/write_hex.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/uart.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/platform.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_list_join.c... Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/stubs/lseek.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/platform/stubs/sbrk.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_util.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_state.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_matrix.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_portme.c...
Analyzing /home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_main.c...

Caller: remove(const char *)
Caller: rename(const char *, const char *) Caller: reset_handler() Callee: c_startup() Caller: __builtin_memcpy(void *, const void *, unsigned long) Caller: __builtin_memset(void *, int, unsigned long) Callee: system_init() Caller: uart_init(unsigned int) Caller: _close(int) Callee: _stub(int) Caller: exit(int) Caller: uart_init(unsigned int) Caller: uart_putc(int) Caller: uart_puts(const char *) Callee: uart_putc(int) Caller: outbyte(int) Callee: uart_putc(int) Caller: reset_vector() Caller: memory_init() Callee: __builtin_constant_p() Caller: portable_init(core_portable *, int *, char **) Caller: portable_fini(core_portable *) Caller: iterate(void *) Callee: core_bench_list(core_results *, ee_s16) Caller: core_list_find(list_head *, list_data *) Caller: core_list_remove(list_head *) Caller: core_list_reverse(list_head *) Caller: crc16(ee_s16, ee_u16) Callee: crcu16(ee_u16, ee_u16) Caller: crcu8(ee_u8, ee_u16) Caller: core_list_mergesort(list_head *, list_cmp, core_results *) Callee: cmp Caller: core_list_undo_remove(list_head *, list_head *) Caller: start_time() Callee: printf(const char *restrict, ...) Callee: get_timer_value() Caller: stop_time() Callee: get_timer_value() Caller: get_time() Caller: time_in_secs(uint64_t) Caller: crcu8(ee_u8, ee_u16) Caller: crc16(ee_s16, ee_u16) Callee: crcu16(ee_u16, ee_u16) Caller: crcu8(ee_u8, ee_u16) Caller: crcu16(ee_u16, ee_u16) Callee: crcu8(ee_u8, ee_u16) Caller: crcu32(ee_u32, ee_u16) Callee: crc16(ee_s16, ee_u16) Caller: crcu16(ee_u16, ee_u16) Callee: crcu8(ee_u8, ee_u16) Caller: check_data_types() Callee: printf(const char *restrict, ...) Caller: portable_malloc(ee_size_t) Caller: portable_free(void *) Caller: parseval(char *) Caller: core_list_init(ee_u32, list_head *, ee_s16) Callee: core_list_insert_new(list_head *, list_data *, list_head **, list_data **, list_head *, list_data *) Caller: copy_info(list_data *, list_data *) Caller: core_bench_list(core_results *, ee_s16) Callee: core_list_find(list_head *, list_data *) Callee: core_list_remove(list_head *) Callee: core_list_reverse(list_head *) Callee: crc16(ee_s16, ee_u16) Caller: crcu16(ee_u16, ee_u16) Callee: crcu8(ee_u8, ee_u16) Callee: core_list_mergesort(list_head *, list_cmp, core_results *) Caller: cmp Callee: core_list_undo_remove(list_head *, list_head *) Caller: core_init_state(ee_u32, ee_s16, ee_u8 *) Caller: core_bench_state(ee_u32, ee_u8 *, ee_s16, ee_s16, ee_s16, ee_u16) Callee: crcu32(ee_u32, ee_u16) Caller: crc16(ee_s16, ee_u16) Callee: crcu16(ee_u16, ee_u16) Caller: crcu8(ee_u8, ee_u16) Callee: core_state_transition(ee_u8 **, ee_u32 *) Caller: ee_isdigit(ee_u8) Caller: core_init_matrix(ee_u32, void *, ee_s32, mat_params *) Caller: core_bench_matrix(mat_params *, ee_s16, ee_u16) Callee: matrix_test(ee_u32, MATRES *, MATDAT *, MATDAT *, MATDAT) Caller: matrix_sum(ee_u32, MATRES *, MATDAT) Caller: matrix_mul_const(ee_u32, MATRES *, MATDAT *, MATDAT) Caller: matrix_mul_matrix_bitextract(ee_u32, MATRES *, MATDAT *, MATDAT *) Caller: matrix_mul_matrix(ee_u32, MATRES *, MATDAT *, MATDAT *) Caller: crc16(ee_s16, ee_u16) Callee: crcu16(ee_u16, ee_u16) Caller: crcu8(ee_u8, ee_u16) Caller: matrix_add_const(ee_u32, MATDAT *, MATDAT) Caller: matrix_mul_vect(ee_u32, MATRES *, MATDAT *, MATDAT *) Caller: core_list_find(list_head *, list_data *) Caller: core_list_reverse(list_head *) Caller: core_list_remove(list_head *) Caller: core_list_undo_remove(list_head *, list_head *) Caller: core_list_insert_new(list_head *, list_data *, list_head **, list_data **, list_head *, list_data *) Callee: copy_info(list_data *, list_data *) Caller: core_list_mergesort(list_head *, list_cmp, core_results *) Callee: cmp Caller: calc_func(ee_s16 *, core_results *) Callee: core_bench_state(ee_u32, ee_u8 *, ee_s16, ee_s16, ee_s16, ee_u16) Caller: crcu32(ee_u32, ee_u16) Callee: crc16(ee_s16, ee_u16) Caller: crcu16(ee_u16, ee_u16) Callee: crcu8(ee_u8, ee_u16) Caller: core_state_transition(ee_u8 **, ee_u32 *) Callee: ee_isdigit(ee_u8) Caller: cmp_complex(list_data *, list_data *, core_results *) Callee: calc_func(ee_s16 *, core_results *) Caller: core_bench_state(ee_u32, ee_u8 *, ee_s16, ee_s16, ee_s16, ee_u16) Callee: crcu32(ee_u32, ee_u16) Caller: crc16(ee_s16, ee_u16) Callee: crcu16(ee_u16, ee_u16) Caller: crcu8(ee_u8, ee_u16) Callee: core_state_transition(ee_u8 **, ee_u32 *) Caller: ee_isdigit(ee_u8) Caller: core_bench_matrix(mat_params *, ee_s16, ee_u16) Callee: matrix_test(ee_u32, MATRES *, MATDAT *, MATDAT *, MATDAT) Caller: matrix_sum(ee_u32, MATRES *, MATDAT) Caller: matrix_mul_const(ee_u32, MATRES *, MATDAT *, MATDAT) Caller: matrix_mul_matrix_bitextract(ee_u32, MATRES *, MATDAT *, MATDAT *) Caller: matrix_mul_matrix(ee_u32, MATRES *, MATDAT *, MATDAT *) Caller: matrix_add_const(ee_u32, MATDAT *, MATDAT) Caller: matrix_mul_vect(ee_u32, MATRES *, MATDAT *, MATDAT *) Caller: cmp_idx(list_data *, list_data *, core_results *) Caller: copy_info(list_data *, list_data *) Caller: _lseek(int, off_t, int) Callee: isatty(int) Callee: _stub(int) Caller: _sbrk(int) Caller: get_seed_32(int) Caller: core_state_transition(ee_u8 **, ee_u32 *) Callee: ee_isdigit(ee_u8) Caller: ee_isdigit(ee_u8) Caller: matrix_test(ee_u32, MATRES *, MATDAT *, MATDAT *, MATDAT) Callee: matrix_sum(ee_u32, MATRES *, MATDAT) Callee: matrix_mul_const(ee_u32, MATRES *, MATDAT *, MATDAT) Callee: matrix_mul_matrix_bitextract(ee_u32, MATRES *, MATDAT *, MATDAT *) Callee: matrix_mul_matrix(ee_u32, MATRES *, MATDAT *, MATDAT *) Callee: crc16(ee_s16, ee_u16) Caller: crcu16(ee_u16, ee_u16) Callee: crcu8(ee_u8, ee_u16) Callee: matrix_add_const(ee_u32, files source_files = set() # Iterate through each compilation command for command_data in compile_commands_data: # Get the source file path from the compilation command source_file = command_data['file'] # Check if the file extension is .c or .cpp if source_file.endswith('.c') or source_file.endswith('.cpp'): source_files.add("/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/" + source_file) # Create a dictionary to store function call relationships function_calls = {} function_variable = {} function_param = {} # Iterate through the paths of all .c and .cpp files for source_file in source_files: print(f"Analyzing {source_file}...") # Create a CompilationDatabase for the specified directory compile_commands = cindex.CompilationDatabase.fromDirectory('/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/') # Get the compilation commands commands = compile_commands.getCompileCommands(source_file) # Create a list to store compilation options file_args = [] for command in commands: for argument in command.arguments: if str(argument).find("-I") >= 0: file_args.append(argument) # Parse the source code file translationUnit = index.parse( source_file, args=file_args, options=cindex.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD ) if not translationUnit: print(f"Failed to parse translation unit for {source_file}") else: # Get the root node of the abstract syntax tree rootCursor = translationUnit.cursor # Recursive function to traverse each node in the abstract syntax tree def visit(cursor, depth=0): global function_name if cursor.kind == cindex.CursorKind.FUNCTION_DECL: # When a function declaration is found function_name = cursor.displayname if function_name not in function_calls: function_calls[function_name] = set() if function_name not in function_variable: function_variable[function_name] = set() if function_name not in function_param: function_param[function_name] = set() # Get function parameter information for param in cursor.get_children(): if param.kind == cindex.CursorKind.PARM_DECL: param_name = param.displayname param_type = param.type.spelling param_info = f" Parameter Name: {param_name} | Parameter Type: {param_type}" function_param[function_name].add(param_info) # Print other functions called by this function for child in cursor.get_children(): if child.kind == cindex.CursorKind.CALL_EXPR: callee = child.referenced if callee: callee_name = callee.displayname function_calls[function_name].add(callee_name) if child.kind: # Get variable declarations referenced by DECL_REF_EXPR var_decl = child.get_definition() if var_decl and var_decl.kind == cindex.CursorKind.VAR_DECL: output = " Variable Name:"+ str(child.displayname) + "|Variable Location:"+ str(var_decl.location.file.name)+ ":"+ str(var_decl.location.line)+ ":"+ str(var_decl.location.column) function_variable[function_name].add(output) for child in cursor.get_children(): visit(child, depth + 1) # Call the traversal function visit(rootCursor) # Record of function calls function_calls_record = [] def print_function_calls(caller, depth=1): if caller in function_calls_record: return else: function_calls_record.append(caller) for callee in function_calls[caller]: if callee in function_calls_record: return else: function_calls_record.append(callee) indent = " " * depth print(f"{indent}Function Declaration: {caller}") print(f"{indent} Callee: {callee}") print_function_calls(callee, depth + 1) total_call_record = [] def print_function_param(function_name, depth): if function_name in function_param: get_function_param= function_param[function_name] indent = " " * depth for x in get_function_param: print(f"{indent} {x}") def print_function_variable(function_name, depth): if function_name in function_variable: get_function_variable = function_variable[function_name] indent = " " * depth for x in get_function_variable: print(f"{indent} {x}") def print_function_calls(caller, depth=0): global function_calls_record if caller in function_calls_record: return else: function_calls_record.append(caller) indent = " " * depth print(f"{indent} Caller: {caller}") print_function_param(caller, depth) print_function_variable(caller, depth) if caller in function_calls: for callee in function_calls[caller]: if callee in function_calls_record: return else: function_calls_record.append(callee) indent = " " * (depth + 1) print(f"{indent} Callee: {callee}") print_function_param(callee, depth+ 1) print_function_variable(caller, depth+ 1) if callee in function_calls: for x in function_calls[callee]: print_function_calls(x, depth + 2) # Iterate through all functions and record function call stacks function_calls_record = [] for caller in function_calls: function_calls_record = [] print_function_calls(caller) print("------------------------------------------------") total_call_record.append(function_calls_record) ``` output ``` ------------------------------------------------ Caller: calc_func(ee_s16 *, core_results *) Parameter Name: pdata | Parameter Type: ee_s16 * Parameter Name: res | Parameter Type: core_results * Variable Name:data|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_list_join.c:72:12 Variable Name:flag|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_list_join.c:81:16 Variable Name:retval|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_list_join.c:73:12 Variable Name:dtype|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_list_join.c:82:16 Variable Name:optype|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_list_join.c:74:12 Callee: core_bench_matrix(mat_params *, ee_s16, ee_u16) Parameter Name: crc | Parameter Type: ee_u16 Parameter Name: seed | Parameter Type: ee_s16 Parameter Name: p | Parameter Type: mat_params * Variable Name:data|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_list_join.c:72:12 Variable Name:flag|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_list_join.c:81:16 Variable Name:retval|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_list_join.c:73:12 Variable Name:dtype|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_list_join.c:82:16 Variable Name:optype|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_list_join.c:74:12 Caller: crc16(ee_s16, ee_u16) Parameter Name: crc | Parameter Type: ee_u16 Parameter Name: newval | Parameter Type: ee_s16 Callee: crcu16(ee_u16, ee_u16) Parameter Name: crc | Parameter Type: ee_u16 Parameter Name: newval | Parameter Type: ee_u16 Caller: crcu8(ee_u8, ee_u16) Parameter Name: crc | Parameter Type: ee_u16 Parameter Name: data | Parameter Type: ee_u8 Variable Name:i|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_util.c:167:11 Variable Name:carry|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_util.c:167:27 Variable Name:x16|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_util.c:167:18 Caller: matrix_test(ee_u32, MATRES *, MATDAT *, MATDAT *, MATDAT) Parameter Name: val | Parameter Type: MATDAT Parameter Name: B | Parameter Type: MATDAT * Parameter Name: N | Parameter Type: ee_u32 Parameter Name: A | Parameter Type: MATDAT * Parameter Name: C | Parameter Type: MATRES * Variable Name:crc|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_matrix.c:132:12 Variable Name:clipval|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_matrix.c:133:12 Callee: matrix_mul_matrix_bitextract(ee_u32, MATRES *, MATDAT *, MATDAT *) Parameter Name: B | Parameter Type: MATDAT * Parameter Name: N | Parameter Type: ee_u32 Parameter Name: A | Parameter Type: MATDAT * Parameter Name: C | Parameter Type: MATRES * Variable Name:crc|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_matrix.c:132:12 Variable Name:clipval|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_matrix.c:133:12 Callee: matrix_mul_vect(ee_u32, MATRES *, MATDAT *, MATDAT *) Parameter Name: B | Parameter Type: MATDAT * Parameter Name: N | Parameter Type: ee_u32 Parameter Name: A | Parameter Type: MATDAT * Parameter Name: C | Parameter Type: MATRES * Variable Name:crc|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_matrix.c:132:12 Variable Name:clipval|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_matrix.c:133:12 Callee: matrix_mul_const(ee_u32, MATRES *, MATDAT *, MATDAT) Parameter Name: val | Parameter Type: MATDAT Parameter Name: N | Parameter Type: ee_u32 Parameter Name: A | Parameter Type: MATDAT * Parameter Name: C | Parameter Type: MATRES * Variable Name:crc|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_matrix.c:132:12 Variable Name:clipval|Variable Location:/home/x213212/cpptest/cppcheck/addons/CoreMark-V5/demo/core_matrix.c:133:12 ------------------------------------------------ ```