# h1 Angr 符号执行学习(一) ## 前言:最近对自动化二进制漏洞挖掘哼感兴趣,百度了下,发现自动化二进制漏洞挖掘的一个核心技术之一Angr符号执行,感觉很有意思再加上之前比赛也有很多关于Angr的题目,angr也可以对抗代码混淆ollvm这些,感觉以后用处蛮大的,于是就学了学,这里记录下笔记,发到博客里和微信公众号里供大家学习,学了一天,才刚学到07文件符号执行化,就分别总结成了一、二、三篇,太菜了 ## 一、00 我们要用到两个模块,一个angr和一个sys angr进行符号执行,sys来进行用户输入输出 我们第一步要进行angr脚本去跑约束逆向的时候, 第一步: 1.创建文件项目: proj=angr.Project(binary,auto_load_libs=False),不加载libc 2.初始化地址块 state=proj.factory.entry_state() 这里的块初始化到了程序的start 块 3.初始化以后,我们要对这个块进行模拟 sim=proj.factory.simgr(state) 4.模拟后,开始寻找正确的路径 ok_address=0x8048675 sim.explore(find=ok_address) 这里是我们输出正确的路径 5.搜索到路径后,把状态放到sim.found[0]里,然后从这个状态里取出来即可 if sim.found: solution_state=sim.found[0] print(solution_state.posix.dumps(sys.stdin.fileno()).decode()) ```python import angr import sys def solver(): binary="./00_angr_find" proj=angr.Project(binary,auto_load_libs=False) state=proj.factory.entry_state() sim=proj.factory.simgr(state) ok_address=0x8048675 sim.explore(find=ok_address) if sim.found: solution_state=sim.found[0] print(solution_state.posix.dumps(sys.stdin.fileno()).decode()) else: print("no!!!!") if __name__=='__main__': solver() ``` ## 二、01 这个案例,前面部分跟00一样,在寻找路径的时候,explore的时候加个avoid参数,这个参数就避开错误代码块的参数 ```python import angr import sys def solver(): binary="./01_angr_avoid" proj=angr.Project(binary) state=proj.factory.entry_state() sim=proj.factory.simgr(state) ok_address=0x80485E0 bi_kai=0x80485A8 sim.explore(find=ok_address,avoid=bi_kai) if sim.found: solution_state=sim.found[0] print(solution_state.posix.dumps(sys.stdin.fileno()).decode()) else: print("no !!!!!") if __name__=='__main__': solver() ``` ## 三、02 这里刚上面一样,唯一的区别就是,自己写了一个避开的函数和正确的函数,赋给了find和avoid,效果都是一样的 ```python import angr import sys def find_path(state): return b'Good Job.' in state.posix.dumps(sys.stdout.fileno()) def avoid_path(state): return b'Try again.' in state.posix.dumps(sys.stdout.fileno()) def solver(): binary="./02_angr_find_condition" proj=angr.Project(binary) init_state=proj.factory.entry_state() sim=proj.factory.simgr(init_state) ok_address=0x08048715 sim.explore(find=find_path,avoid=avoid_path) if sim.found: solution_state=sim.found[0] print(solution_state.posix.dumps(sys.stdin.fileno()).decode()) else: raise Exception('Could not find the solution') if __name__=='__main__': solver() ``` ## 四、03 regs 寄存器 这里就有特点了,对寄存器的操作 我们看题目代码 ```python int __cdecl main(int argc, const char **argv, const char **envp) { __int64 user_input; // rax int v5; // [esp+4h] [ebp-14h] int v6; // [esp+8h] [ebp-10h] int v7; // [esp+Ch] [ebp-Ch] int v8; // [esp+Ch] [ebp-Ch] printf("Enter the password: "); user_input = get_user_input(); v7 = HIDWORD(user_input); v5 = complex_function_1(user_input); v6 = complex_function_2(); v8 = complex_function_3(v7); if ( v5 || v6 || v8 ) puts("Try again."); else puts("Good Job."); return 0; } ``` get_user_input: ```python int get_user_input() { int v1; // [esp+0h] [ebp-18h] BYREF int v2; // [esp+4h] [ebp-14h] BYREF int v3[4]; // [esp+8h] [ebp-10h] BYREF v3[1] = __readgsdword(0x14u); __isoc99_scanf("%x %x %x", &v1, &v2, v3); return v1; } ``` 通过上面两行代码我们发现,在getuserinput这个函数里,对v1、v2、v3进行了输入,然后只用了v1,然后看汇编 ```inter 0804892A 68 93 8A 04 08 push offset aXXX ; "%x %x %x" .text:0804892F E8 9C FA FF FF call ___isoc99_scanf .text:0804892F .text:08048934 83 C4 10 add esp, 10h .text:08048937 8B 4D E8 mov ecx, [ebp+var_18] .text:0804893A 89 C8 mov eax, ecx .text:0804893C 8B 4D EC mov ecx, [ebp+var_14] .text:0804893F 89 CB mov ebx, ecx .text:08048941 8B 4D F0 mov ecx, [ebp+var_10] .text:08048944 89 CA mov edx, ecx .text:08048946 90 nop ``` 发现三个值传给 了eax ebx ecx,我们可以在输入函数之后,利用向量给 initial_state.regs.eax ebx edx,进行向量的赋值,在后面进行状态查看即可,分析完后,我这里对关键的exp 代码进行分析 (1).start_address=0x08048980 initial_state=proj.factory.blank_state(addr=start_address) 这里在输入函数之后开始空白块,我这里理解的空白块是从这里开始进行向下执行,并且绕过程序自己的输入函数,我们自己进行对reg来操作赋值 (2). ```python passwd_0=claripy.BVS('passwd_0',32) passwd_1=claripy.BVS('passwd_1',32) passwd_2=claripy.BVS('passwd_2',32) initial_state.regs.eax=passwd_0 initial_state.regs.ebx=passwd_1 initial_state.regs.edx=passwd_2 ``` 用claripy进行向量的创建,创建3个向量,因为是int32位,所以这里后面写成了32,然后再把三个向量导入到eax ebx edx (3). ```python simgr=proj.factory.simgr(initial_state) simgr.explore(find=find_path,avoid=avoid_path) ``` 然后进行模拟执行并且查询 (4) . 然后通过simgr.found获取解决后的state(状态), ``` solution0=solution_state.solver.eval(passwd_0) solution1=solution_state.solver.eval(passwd_1) solution2=solution_state.solver.eval(passwd_2) ``` 然后再用solution_state.solver.eval获取状态值,获取到状态值打印即可 ```python import angr import sys import claripy def find_path(state): return b'Good Job.' in state.posix.dumps(sys.stdout.fileno()) def avoid_path(state): return b'Try again.' in state.posix.dumps(sys.stdout.fileno()) def solver(): binary="./03_angr_symbolic_registers" proj=angr.Project(binary) start_address=0x08048980 initial_state=proj.factory.blank_state(addr=start_address) passwd_0=claripy.BVS('passwd_0',32) passwd_1=claripy.BVS('passwd_1',32) passwd_2=claripy.BVS('passwd_2',32) initial_state.regs.eax=passwd_0 initial_state.regs.ebx=passwd_1 initial_state.regs.edx=passwd_2 simgr=proj.factory.simgr(initial_state) simgr.explore(find=find_path,avoid=avoid_path) if simgr.found: solution_state=simgr.found[0] solution0=solution_state.solver.eval(passwd_0) solution1=solution_state.solver.eval(passwd_1) solution2=solution_state.solver.eval(passwd_2) print('passwd0:{}'.format(hex(solution0))) print('passwd1:{}'.format(hex(solution1))) print('passwd2:{}'.format(hex(solution2))) else: raise Exception("Could not find the solution!") if __name__=='__main__': solver() ``` ## 五、04 stack 这里是对stack的一个符号化,对栈的化,就得注意栈的分布了,esp和ebp寄存器这些 我们还是选关键代码进行分析 (1)。 ```python passwd0=claripy.BVS("passwd0",32) #这里创建了向量 passwd1=claripy.BVS("passwd1",32) # initial_state.regs.ebp=initial_state.regs.esp initial_state.regs.esp-=0x8 #scanf initial_state.stack_push(passwd0) initial_state.stack_push(passwd1) ``` 第一步先把寄存器esp给了ebp,然后让esp-0x8,这里减0x8的原因,我推测有两个原因,一是联系上下文、二是开辟栈空间 然后把我们创建的向量放到stack上即可,下面代码还是老一套,solver.eval(向量状态) ```python import angr import sys import claripy def solver(): binary="./04_angr_symbolic_stack" proj=angr.Project(binary) start_address=0x8048697 initial_state=proj.factory.blank_state(addr=start_address) passwd0=claripy.BVS("passwd0",32) passwd1=claripy.BVS("passwd1",32) initial_state.regs.ebp=initial_state.regs.esp initial_state.regs.esp-=0x8 #scanf initial_state.stack_push(passwd0) initial_state.stack_push(passwd1) #initial_state.regs.esp-=12 #.text:08048682 8D 45 F0 lea eax, [ebp+var_10]the relative position of esp when return from scanf() simgr=proj.factory.simgr(initial_state) simgr.explore(find=0x80486E1,avoid=0x80486CF) if simgr.found: solution_state=simgr.found[0] solution0=solution_state.solver.eval(passwd0) solution1=solution_state.solver.eval(passwd1) print("passwd0:{}".format(hex(solution0))) print("passwd1:{}".format(hex(solution1))) else: raise Exception("Colindd not !!!!") if __name__=='__main__': solver() ``` 六、05 me'mory 这道题是对memory内存进行符号化 我们看下源代码: ```c int __cdecl main(int argc, const char **argv, const char **envp) { int i; // [esp+Ch] [ebp-Ch] memset(user_input, 0, 0x21u); printf("Enter the password: "); __isoc99_scanf("%8s %8s %8s %8s", user_input, &unk_A1BA1C8, &unk_A1BA1D0, &unk_A1BA1D8); for ( i = 0; i <= 31; ++i ) *(_BYTE *)(i + 0xA1BA1C0) = complex_function(*(char *)(i + 0xA1BA1C0), i); if ( !strncmp(user_input, "NJPURZPCDYEAXCSJZJMPSOMBFDDLHBVN", 0x20u) ) puts("Good Job."); else puts("Try again."); return 0; } ``` 通过以上代码,我们发现是往内存bss段里读入4个8bytes,所以这里我们利用一个为内存赋值的语句 init_state.memory.store(address,passwd0) 这个是把passwd0赋值给address这个地址的里 然后我们创建4个变量并且把这个四个变量放到读入的内存里 ```python address=0xA1BA1C0 init_state.memory.store(address,passwd0) #放到 init_state.memory.store(address+0x8,passwd1) init_state.memory.store(address+0x10,passwd2) init_state.memory.store(address+0x18,passwd3) ``` 这里注意字节的增长 然后下面就是按照之前的方法获取相应的状态值即可 ```python import angr import sys import claripy def solver(): binary="./05_angr_symbolic_memory" proj=angr.Project(binary) start_address=0x8048601 init_state=proj.factory.blank_state(addr=start_address) passwd0=claripy.BVS("passwd0",64) passwd1=claripy.BVS("passwd1",64) passwd2=claripy.BVS("passwd2",64) passwd3=claripy.BVS("passwd3",64) address=0xA1BA1C0 init_state.memory.store(address,passwd0) init_state.memory.store(address+0x8,passwd1) init_state.memory.store(address+0x10,passwd2) init_state.memory.store(address+0x18,passwd3) simgr=proj.factory.simgr(init_state) simgr.explore(find=0x804866A,avoid=0x8048658) if simgr.found: solution_state=simgr.found[0] solution0=solution_state.solver.eval(passwd0,cast_to=bytes) solution1=solution_state.solver.eval(passwd1,cast_to=bytes) solution2=solution_state.solver.eval(passwd2,cast_to=bytes) solution3=solution_state.solver.eval(passwd3,cast_to=bytes) print("passwd0:{}".format(solution0)) print("passwd1:{}".format(solution1)) print("passwd2:{}".format(solution2)) print("passwd3:{}".format(solution3)) raise Exception("couln not !!!!!!!") if __name__=='__main__': solver() ``` ## 七、06 动态memory 这里是动态的chunk,malloc出来的,这里我们在动态里进行伪造fakechunk,然后再往这个伪造的chunk进行放入向量即可 先看下源代码: ```c int __cdecl main(int argc, const char **argv, const char **envp) { char *v3; // ebx char *v4; // ebx int v6; // [esp-10h] [ebp-1Ch] int i; // [esp+0h] [ebp-Ch] buffer0 = (char *)malloc(9u); buffer1 = (char *)malloc(9u); memset(buffer0, 0, 9u); memset(buffer1, 0, 9u); printf("Enter the password: "); __isoc99_scanf("%8s %8s", buffer0, buffer1, v6); for ( i = 0; i <= 7; ++i ) { v3 = &buffer0[i]; *v3 = complex_function(buffer0[i], i); v4 = &buffer1[i]; *v4 = complex_function(buffer1[i], i + 32); } if ( !strncmp(buffer0, "UODXLZBI", 8u) && !strncmp(buffer1, "UAORRAYF", 8u) ) puts("Good Job."); else puts("Try again."); free(buffer0); free(buffer1); return 0; } ``` 获取buffer0和buffer1的地址对这个地址进行fake即可 以下是我伪造的代码: ```python buf0=0xABCC8A4 buf1=0xABCC8AC fake_buf0=0xAAAAA10 fake_buf1=0xAAAAA20 p1=claripy.BVS("p1",64) p2=claripy.BVS("p2",64) init_state.memory.store(buf0,fake_buf0,endness=proj.arch.memory_endness) init_state.memory.store(buf1,fake_buf1,endness=proj.arch.memory_endness) init_state.memory.store(fake_buf0,p1) init_state.memory.store(fake_buf1,p2) ``` 然后下一步一把梭获取状态值即可 ```python import angr import sys import claripy def find_path(state): return b'Good Job.' in state.posix.dumps(sys.stdout.fileno()) def avoid_path(state): return b'Try again.' in state.posix.dumps(sys.stdout.fileno()) def solver(): binary="./06_angr_symbolic_dynamic_memory" proj=angr.Project(binary) start_address=0x8048699 init_state=proj.factory.blank_state(addr=start_address) buf0=0xABCC8A4 buf1=0xABCC8AC fake_buf0=0xAAAAA10 fake_buf1=0xAAAAA20 p1=claripy.BVS("p1",64) p2=claripy.BVS("p2",64) init_state.memory.store(buf0,fake_buf0,endness=proj.arch.memory_endness) init_state.memory.store(buf1,fake_buf1,endness=proj.arch.memory_endness) init_state.memory.store(fake_buf0,p1) init_state.memory.store(fake_buf1,p2) simgr=proj.factory.simgr(init_state) simgr.explore(find=find_path,avoid=avoid_path) if simgr.found: sol_state=simgr.found[0] p1=sol_state.solver.eval(p1,cast_to=bytes) p2=sol_state.solver.eval(p2,cast_to=bytes) print("p1:{}".format(p1)) print("p2:{}".format(p2)) else: raise Exception("Colud not!!!!!!") if __name__=='__main__': solver() ``` 八、07 符号fiile文件 这个题是对file文件进行符号化 我们看下题目代码: ```c int __cdecl __noreturn main(int argc, const char **argv, const char **envp) { int i; // [esp+Ch] [ebp-Ch] memset(buffer, 0, sizeof(buffer)); printf("Enter the password: "); __isoc99_scanf("%64s", buffer); ignore_me((int)buffer, 0x40u); memset(buffer, 0, sizeof(buffer)); fp = fopen("OJKSQYDP.txt", "rb"); fread(buffer, 1u, 0x40u, fp); fclose(fp); unlink("OJKSQYDP.txt"); for ( i = 0; i <= 7; ++i ) *(_BYTE *)(i + 0x804A0A0) = complex_function(*(char *)(i + 0x804A0A0), i); if ( strncmp(buffer, "AQWLCTXB", 9u) ) { puts("Try again."); exit(1); } puts("Good Job."); exit(0); } ``` 我们看到,这道打开了一个文件。在这道题的基础上,我们可以使用angr模拟文件从而进行文件内容符号化,我们用到了几个关键语句 ```python file_size=0x40 filename='OJKSQYDP.txt' password=claripy.BVS('password',file_size*8) file_name=angr.storage.SimFile(filename,content=password,size=file_size) init_state.fs.insert(filename,file_name) ``` 我们利用angr.storage.SimFile模拟文件,第一个参数为文件名,第二个为我们的向量,第三个为题目本身的读入的size,然后模拟完后,我们再对文件系统进行载入init_state.fs.insert(filename,file_name)即可实现文件符号化 然后剩下就是用angr进行模拟,获取状态,获取向量状态值,一把梭 ```python import angr import sys import claripy def isGood(state): return b'Good Job.' in state.posix.dumps(1) def isBad(state): return b'Try again.' in state.posix.dumps(1) def solver(): binary="./07_angr_symbolic_file" proj=angr.Project(binary) start_address=0x80488EA init_state=proj.factory.blank_state(addr=start_address) file_size=0x40 filename='OJKSQYDP.txt' password=claripy.BVS('password',file_size*8) file_name=angr.storage.SimFile(filename,content=password,size=file_size) init_state.fs.insert(filename,file_name) simgr=proj.factory.simgr(init_state) simgr.explore(find=isGood,avoid=isBad) if simgr.found: sol_state=simgr.found[0] passwd=sol_state.solver.eval(password,cast_to=bytes) print("passwd:{}".format(passwd)) else: raise Exception("Colund not!!!!!!") if __name__=="__main__": solver() ``` 未完持续