# HITCONCTF 2022 - Meow Way We are given a 32-bit Windows executable. Lets load it into IDA and see what we can find: **INITIAL ENUMERATION** ```C= int __cdecl main(int argc, const char **argv, const char **envp) { char v4; // [esp+0h] [ebp-24h] int v5; // [esp+0h] [ebp-24h] int v6; // [esp+14h] [ebp-10h] int v7[2]; // [esp+18h] [ebp-Ch] BYREF v7[0] = -1; v7[1] = -1; if ( argc < 2 ) { sub_401340("Usage: %s <flag>\n", (char)*argv); exit(1); } if ( strlen(argv[1]) != 48 ) { sub_401340("Wrong length\n", v4); exit(1); } v6 = (int)argv[1]; dword_40544C(v6, v6 >> 31, v6, v6 >> 31, 196, 0, v7, (int)v7 >> 31); ++v6; dword_4053A8(v6, v6 >> 31, v6, v6 >> 31, 22, 0, v7, (int)v7 >> 31); ++v6; dword_4053B4(v6, v6 >> 31, v6, v6 >> 31, 142, 0, v7, (int)v7 >> 31); ++v6; dword_4053F0(v6, v6 >> 31, v6, v6 >> 31, 119, 0, v7, (int)v7 >> 31); ++v6; dword_405448(v6, v6 >> 31, v6, v6 >> 31, 5, 0, v7, (int)v7 >> 31); ++v6; dword_4053FC(v6, v6 >> 31, v6, v6 >> 31, 185, 0, v7, (int)v7 >> 31); ++v6; dword_405400(v6, v6 >> 31, v6, v6 >> 31, 13, 0, v7, (int)v7 >> 31); ++v6; dword_405410(v6, v6 >> 31, v6, v6 >> 31, 107, 0, v7, (int)v7 >> 31); ++v6; dword_4053F8(v6, v6 >> 31, v6, v6 >> 31, 36, 0, v7, (int)v7 >> 31); ++v6; dword_405430(v6, v6 >> 31, v6, v6 >> 31, 85, 0, v7, (int)v7 >> 31); ++v6; dword_4053D0(v6, v6 >> 31, v6, v6 >> 31, 18, 0, v7, (int)v7 >> 31); ++v6; dword_405434(v6, v6 >> 31, v6, v6 >> 31, 53, 0, v7, (int)v7 >> 31); ++v6; dword_40545C(v6, v6 >> 31, v6, v6 >> 31, 118, 0, v7, (int)v7 >> 31); ++v6; dword_405454(v6, v6 >> 31, v6, v6 >> 31, 231, 0, v7, (int)v7 >> 31); ++v6; dword_4053C0(v6, v6 >> 31, v6, v6 >> 31, 251, 0, v7, (int)v7 >> 31); ++v6; dword_4053E4(v6, v6 >> 31, v6, v6 >> 31, 160, 0, v7, (int)v7 >> 31); ++v6; dword_4053C4(v6, v6 >> 31, v6, v6 >> 31, 218, 0, v7, (int)v7 >> 31); ++v6; dword_405440(v6, v6 >> 31, v6, v6 >> 31, 52, 0, v7, (int)v7 >> 31); ++v6; dword_4053BC(v6, v6 >> 31, v6, v6 >> 31, 132, 0, v7, (int)v7 >> 31); ++v6; dword_4053AC(v6, v6 >> 31, v6, v6 >> 31, 180, 0, v7, (int)v7 >> 31); ++v6; dword_405408(v6, v6 >> 31, v6, v6 >> 31, 200, 0, v7, (int)v7 >> 31); ++v6; dword_4053D8(v6, v6 >> 31, v6, v6 >> 31, 155, 0, v7, (int)v7 >> 31); ++v6; dword_4053B8(v6, v6 >> 31, v6, v6 >> 31, 239, 0, v7, (int)v7 >> 31); ++v6; dword_4053C8(v6, v6 >> 31, v6, v6 >> 31, 180, 0, v7, (int)v7 >> 31); ++v6; dword_4053E0(v6, v6 >> 31, v6, v6 >> 31, 185, 0, v7, (int)v7 >> 31); ++v6; dword_405418(v6, v6 >> 31, v6, v6 >> 31, 10, 0, v7, (int)v7 >> 31); ++v6; dword_4053EC(v6, v6 >> 31, v6, v6 >> 31, 87, 0, v7, (int)v7 >> 31); ++v6; dword_405414(v6, v6 >> 31, v6, v6 >> 31, 92, 0, v7, (int)v7 >> 31); ++v6; dword_405450(v6, v6 >> 31, v6, v6 >> 31, 254, 0, v7, (int)v7 >> 31); ++v6; dword_4053E8(v6, v6 >> 31, v6, v6 >> 31, 197, 0, v7, (int)v7 >> 31); ++v6; dword_4053D4(v6, v6 >> 31, v6, v6 >> 31, 106, 0, v7, (int)v7 >> 31); ++v6; dword_40541C(v6, v6 >> 31, v6, v6 >> 31, 115, 0, v7, (int)v7 >> 31); ++v6; dword_40542C(v6, v6 >> 31, v6, v6 >> 31, 73, 0, v7, (int)v7 >> 31); ++v6; dword_405444(v6, v6 >> 31, v6, v6 >> 31, 189, 0, v7, (int)v7 >> 31); ++v6; dword_405458(v6, v6 >> 31, v6, v6 >> 31, 17, 0, v7, (int)v7 >> 31); ++v6; dword_405420(v6, v6 >> 31, v6, v6 >> 31, 214, 0, v7, (int)v7 >> 31); ++v6; dword_4053B0(v6, v6 >> 31, v6, v6 >> 31, 143, 0, v7, (int)v7 >> 31); ++v6; dword_4053DC(v6, v6 >> 31, v6, v6 >> 31, 107, 0, v7, (int)v7 >> 31); ++v6; dword_405464(v6, v6 >> 31, v6, v6 >> 31, 10, 0, v7, (int)v7 >> 31); ++v6; dword_4053CC(v6, v6 >> 31, v6, v6 >> 31, 151, 0, v7, (int)v7 >> 31); ++v6; dword_405424(v6, v6 >> 31, v6, v6 >> 31, 171, 0, v7, (int)v7 >> 31); ++v6; dword_40543C(v6, v6 >> 31, v6, v6 >> 31, 78, 0, v7, (int)v7 >> 31); ++v6; dword_405404(v6, v6 >> 31, v6, v6 >> 31, 237, 0, v7, (int)v7 >> 31); ++v6; dword_405428(v6, v6 >> 31, v6, v6 >> 31, 254, 0, v7, (int)v7 >> 31); ++v6; dword_405460(v6, v6 >> 31, v6, v6 >> 31, 151, 0, v7, (int)v7 >> 31); ++v6; dword_40540C(v6, v6 >> 31, v6, v6 >> 31, 249, 0, v7, (int)v7 >> 31); ++v6; dword_4053F4(v6, v6 >> 31, v6, v6 >> 31, 152, 0, v7, (int)v7 >> 31); dword_405438(v6 + 1, (v6 + 1) >> 31, v6 + 1, (v6 + 1) >> 31, 101, 0, v7, (int)v7 >> 31); v5 = memcmp(&unk_405018, argv[1], 0x30u); if ( v5 ) { sub_401340("Wrong\n", v5); exit(-1); } sub_401340("I know you know the flag!\n", 0); return 0; } ``` So looking at this decompiliation of main, we see in lines 10-14: ```C= if ( argc < 2 ) { sub_401340("Usage: %s <flag>\n", (char)*argv); exit(1); } ``` It asks for our flag as an argument to the program. Then in lines 15-19: ```C= if ( strlen(argv[1]) != 48 ) { sub_401340("Wrong length\n", v4); exit(1); } ``` The program checks if the length of our input is 48. I should also mention that the flag format is `hitcon{...}`. Continuing on inspecting our main function, we see it calls a series of function pointers on `v6`, which starts at the beginning of our input and is incremented by 1 after each successive call. We can guess that this is doing a byte by byte check of our program. In line 115 we have a memcmp to our input to a static piece of data in memory, length 48: ```C= v5 = memcmp(&unk_405018, argv[1], 0x30u); ``` Looking at the raw bytes: ``` b"\x96P\xcf,\xeb\x9b\xaa\xfbS\xabs\xddl\x9e\xdb\xbc\xee\xab#\xd6\x16\xfd\xf1\xf0\xb9u\xc3(\xa2t}\xe3'\xd5\x95\\\xf5vu\xc9\x8c\xfbB\x0e\xbdQ\xa2\x98" ``` We can see that there are non-ascii bytes, meaning our input is modified through these function pointer calls. So to summarize, the program: * Takes an argument string of length 48 * Transforms our input, byte by byte, through multiple function pointers * Compares our result to a static set of bytes * Comparison needs to be true for us to find the correct flag Lets move onto analyzing the function pointers in the program. **DEEPER ANALYSIS** Lets follow the first function pointer on line 21 of our main program: ``` dword_40544C(v6, v6 >> 31, v6, v6 >> 31, 196, 0, v7, (int)v7 >> 31); ``` IDA allows us to see XREFs, or which functions use memory address. ``` .data:0040544C ; int (__cdecl *dword_40544C)(_DWORD, _DWORD, _DWORD, _DWORD, _DWORD, _DWORD, _DWORD, _DWORD) .data:0040544C dword_40544C dd ? ; DATA XREF: sub_401000+3↑w .data:0040544C ; _main+C8↑r ``` Lets look at `sub_401000`: ```asm= .text:00401000 ; void sub_401000() .text:00401000 sub_401000 proc near ; DATA XREF: .rdata:004030D4↓o .text:00401000 push ebp .text:00401001 mov ebp, esp .text:00401003 mov dword_40544C, offset unk_4031C0 .text:0040100D pop ebp .text:0040100E retn .text:0040100E sub_401000 endp ``` So it looks like the memory address `unk_4031C0` is loaded into our function pointer. Lets follow that again. Looking at the raw assembly we have the following: ```asm= .rdata:004031C0 loc_4031C0: ; DATA XREF: sub_401000+3↑o .rdata:004031C0 push 33h ; '3' .rdata:004031C2 call $+5 .rdata:004031C7 add dword ptr [esp], 5 .rdata:004031CB retf .rdata:004031CC ; --------------------------------------------------------------------------- .rdata:004031CC dec eax .rdata:004031CD xor eax, eax .rdata:004031CF db 65h .rdata:004031CF dec eax .rdata:004031D1 mov eax, [eax+60h] .rdata:004031D4 dec eax .rdata:004031D5 movzx eax, byte ptr [eax+2] .rdata:004031D9 mov ecx, [si+24h] .rdata:004031DD sbb al, 67h ; 'g' .rdata:004031DF mov [ecx], eax .rdata:004031E1 test eax, eax .rdata:004031E3 jnz short loc_4031FD .rdata:004031E5 mov edi, [si+24h] .rdata:004031E9 add al, 67h ; 'g' .rdata:004031EB mov esi, [esp+0Ch] .rdata:004031EF mov ecx, [si+24h] .rdata:004031F3 adc al, 67h ; 'g' .rdata:004031F5 add cl, [esi] .rdata:004031F7 xor cl, 0BAh .rdata:004031FA mov [bx], cl .rdata:004031FD ``` The initial set of instructions looks quite interesting, especially the `retf` instruction on line 5. Especially since its a far return probably to an external dll. This function prologue matches a well known technique known as `Heaven's Gate`. This link has a good explanation on it as well as an example using it: https://blog.attify.com/flare-on-5-writeup-part5/ TLDR: this technique is used when you want to switch the processor mode from system32 to call SysWow (x64) libs in Windows. This also means if we try to use a debugger like x32dbg or even IDA's builtin debugger (if you have it), it won't be able to switch execution mode correctly when it calls the `retf` instruction we mentioned above. You can try it yourself to confirm. However, as the link describes, we can use a free tool to debug our executable: Windbg! Launch your executable with advanced settings as shown below. We are using 64-bit to ensure the processor mode changes successfully. ![](https://i.imgur.com/TnCSK9j.png) Click on the View > Modules tab to see the base address of your executableand use your disassembler to find the relative offset for the function pointer we are looking at. Then simply replace that address in the `address` box in the `Disassembly` tab as shown below: ![](https://i.imgur.com/Hju4vNd.png) Then in the command tab we simply type `bp address` to set a breakpoint right at the beginning of the function pointer call. Then we type `g` to continue program execution until we reach our breakpoint: ![](https://i.imgur.com/fSHGqqd.png) Now we step execution by typing `p` into the command box until we reach our retf instruction: ![](https://i.imgur.com/p8rw8Ew.png) Now one more step over that instruction and we shall see the instructions below change into 64-bit mode: ![](https://i.imgur.com/6vczUpQ.png) Notice the instructions have changed and are different than the static disassembly we viewed in IDA earlier: ```asm= 001e31cc 4831c0 xor rax, rax 001e31cf 65488b4060 mov rax, qword ptr gs:[rax+60h] 001e31d4 480fb64002 movzx rax, byte ptr [rax+2] 001e31d9 678b4c241c mov ecx, dword ptr [esp+1Ch] 001e31de 678901 mov dword ptr [ecx], eax 001e31e1 85c0 test eax, eax 001e31e3 7518 jne 00000000001E31FD 001e31e5 678b7c2404 mov edi, dword ptr [esp+4] 001e31ea 678b74240c mov esi, dword ptr [esp+0Ch] 001e31ef 678b4c2414 mov ecx, dword ptr [esp+14h] 001e31f4 67020e add cl, byte ptr [esi] 001e31f7 80f1ba xor cl, 0BAh 001e31fa 67880f mov byte ptr [edi], cl 001e31fd e800000000 call 00000000001E3202 001e3202 c744240423000000 mov dword ptr [rsp+4], 23h 001e320a 8304240d add dword ptr [rsp], 0Dh 001e320e cb retf ``` Now we can clearly see a cleaner disassembly and can finally begin reversing this logic. **REVERSING** We can already see references to the stack in the disassembly above. Keep in mind the original program is in 32-bit, so those would be arguments to our function. Lets look at the main function right before our first function call: ```C= dword_40544C(v6, v6 >> 31, v6, v6 >> 31, 196, 0, v7, (int)v7 >> 31); ``` So our stack would be setup like the following: ``` v6 = current byte of input v7 = random piece of memory esp + 4: v6 esp + 8: v6 >> 31 esp + 0xc: v6 esp + 0x10: v6 >> 31 esp + 0x14: 196 esp + 0x18: 0 esp + 0x1c: v7 esp + 0x20: v7 >> 31 ``` Lets reference back to the disassembly and understand what that function pointer is doing to the current byte of our input: * Stores some random pointer to gs register (prob TIB info) into v7's memory address * edi has a pointer to our current byte * esi also has a pointer to our current byte * ecx hold the value to the static value 196 * It looks like it does 8-bit addition with our input and the static value, then proceeds to xor that result with a static value. * Then our result is replacing our current input! So we can boil the function pointer down to the following equation: ``` ((add_value + curr_input) & 0xff) ^ xor_value ``` Where `add_value` is the 5th parameter passed in and `xor_value` is the static value in the disassembly that it is xor'd against. If you repeat the steps we did for other function pointers you'll notice that the exact same pattern is there, except the 2 values mentioned above will be different. Also, the assembly on line 11 of the 64-bit disassembly of our function pointer will be different sometimes, using a `sub` instead of an `add`: `sub cl, byte ptr [esi]` Which makes our formula the following: ``` ((add_value - curr_input) & 0xff) ^ xor_value ``` Now that we know what the function pointers do to our input, we can grab the flag simply through brute force. The amount of ascii-printable chars is a small set, and the operation we are using on our input can be written easily in Python or C (I'll use Python). So, the hardest part of this challenge will be grabbing the `add_value` and `xor_value` above. You have some options: * Patching the program to retrieve the value dynamically through some kind of print or log statement * Using a tool like [capstone](https://www.capstone-engine.org/lang_python.html) or [pwntools](https://github.com/Gallopsled/pwntools) to emulate the disassembly and retrieve this values through some output carving. * Manual labor! However you choose to do it, the script below should be similar to what you come up with to brute force: ```python= import string finale = b"\x96P\xcf,\xeb\x9b\xaa\xfbS\xabs\xddl\x9e\xdb\xbc\xee\xab#\xd6\x16\xfd\xf1\xf0\xb9u\xc3(\xa2t}\xe3'\xd5\x95\\\xf5vu\xc9\x8c\xfbB\x0e\xbdQ\xa2\x98" vals = [196, 22, 142, 119, 5, 185, 13, 107, 36, 85, 18, 53, 118, 231, 251, 160, 218, 52, 132, 180, 200, 155, 239, 180, 185, 10, 87, 92, 254, 197, 106, 115, 73, 189, 17, 214, 143, 107, 10, 151, 171, 78, 237, 254, 151, 249, 152] xor_vals = [186, 47, 205, 246, 159, 208, 34, 247, 208, 31, 168, 61, 199, 165, 71, 104, 215, 74, 150, 145, 46, 25, 197, 227, 136, 189, 78, 147, 19, 241, 204, 71, 171, 201, 72, 43, 9, 80, 79, 233, 192, 94, 239, 139, 133, 203, 85] a_s = [1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0] charset = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~" flag = "" counter = 0 while (counter < 47): if a_s[counter] == 1: for c in charset: g = ((c + vals[counter]) & 0xff) ^ xor_vals[counter] if g == finale[counter]: flag += chr(c) counter += 1 break elif a_s[counter] == 0: for c in charset: g = ((vals[counter] - c) & 0xff) ^ xor_vals[counter] if g == finale[counter]: flag += chr(c) counter += 1 break print(flag, counter) ``` We get the following flag as the answer: ``` hitcon{___7U5T_4_S1mpIE_xB6_M@G1C_4_mE0w_W@y___} ```