# Writeup - Hash ```python def sha42(s: bytes, rounds=42): out = [0]*21 for round in range(rounds): for c in range(len(s)): if config[((c//21)+round)%len(config)][c%21] == 1: out[(c+round)%21] ^= s[c] return bytes(out).hex() ``` By examining the sha42() function, we see that each byte of the output is the xor-sum of some subset of the input bytes. For example, `out[0] = s[0] xor s[4] xor s[5] xor s[7] xor ...`, `out[1] = ...`, etc Since we have a bunch of xor-equations, the idea here is that we can combine these equations and move around the variables, so that we end up with something like: `s[0] = out[0] xor out[6] xor out[20] xor out[11]`, etc And we can hence recover each byte of the input `s`. There are many ways to implement this, but the way I chose to implement my solution was sort of based on the vector-basis-finding algorithm in [this blog](https://codeforces.com/blog/entry/68953). I highly encourage you to read it, but the TL;DR is, given a list of vectors, you can construct another list of vectors that form a "triangle shape" (linearly indepenent) as such: ```python # warning: pseudocode, not actual python vectors = ["1100....", "0101....", ...] # list of vectors new_vectors = list() for vector in vectors: for prev_vector in new_vectors: # if vector contains '1' at leading bit of prev_vector, use xor to cancel it out if vector[prev_vector.index('1')] == '1': vector = xor(vector, prev_vector) new_vectors.append(vector) new_vectors.sort(lambda x: -x.index('1')) # make sure we xor in the correct order for line in new_vectors: print(line) # should give a triangle-shape ``` The idea is to represent each equation in the form of a vector. E.g. `out[0] = s[0] xor s[4] xor s[5]` is represented by the vector `(1, 0, 0, 0, 1, 1, ...)`, where a '1' in position 'i' means that `s[i]` is involved in the xor-equation, and '0' means `s[i]` is not. We construct the vectors required, then use a similar technique to the one described in the blog above to get a set of linearly independent vectors. (Note: we must also keep track of the "path" to get to each vector.) For example, if equation 4 is `out[4] = s[2] xor s[3]` and equation 7 is `out[7] = s[2]`, then by combining these equations, we are able to reach the equation `s[3] = out[4] xor out[7]` by taking the path `[4, 7]`. ```python # outs contains the vector representation of the original equations for different lengths of input s. outs = [['101100010100101', '000001100101100', '000010111001101', '111001010111010', '000111000001010', '101111101100111', '001110101000011', '001111101111100', '001100111110000', '101101110011100', '001101111110011', '010110000101100', '100100100101010', '111101111001001', '111011000000110', '001000100100111', '010001011010001', '011000010000001', '110100111010000', '010000010011001', '010100111001010'], ['1101100010100101', '1000001100101100', '1000010111001101', '0111001010111010', '1000111000001010', '0101111101100111', '0001110101000011', '1001111101111100', '0001100111110000', '0101101110011100', '1001101111110011', '1010110000101100', '0100100100101010', '1111101111001001', '1111011000000110', '1001000100100111', '0010001011010001', '0011000010000001', '0110100111010000', '1010000010011001', '0010100111001010'], ['01101100010100101', '01000001100101100', '11000010111001101', '10111001010111010', '01000111000001010', '00101111101100111', '00001110101000011', '01001111101111100', '10001100111110000', '10101101110011100', '11001101111110011', '01010110000101100', '00100100100101010', '11111101111001001', '01111011000000110', '11001000100100111', '00010001011010001', '00011000010000001', '00110100111010000', '11010000010011001', '10010100111001010'], ['001101100010100101', '101000001100101100', '111000010111001101', '110111001010111010', '101000111000001010', '000101111101100111', '000001110101000011', '001001111101111100', '110001100111110000', '010101101110011100', '011001101111110011', '101010110000101100', '100100100100101010', '011111101111001001', '101111011000000110', '111001000100100111', '000010001011010001', '000011000010000001', '000110100111010000', '111010000010011001', '010010100111001010'], ['0001101100010100101', '0101000001100101100', '0111000010111001101', '1110111001010111010', '1101000111000001010', '0000101111101100111', '1000001110101000011', '0001001111101111100', '0110001100111110000', '1010101101110011100', '1011001101111110011', '1101010110000101100', '0100100100100101010', '1011111101111001001', '1101111011000000110', '1111001000100100111', '1000010001011010001', '0000011000010000001', '0000110100111010000', '0111010000010011001', '1010010100111001010'], ['10001101100010100101', '10101000001100101100', '10111000010111001101', '01110111001010111010', '11101000111000001010', '10000101111101100111', '01000001110101000011', '10001001111101111100', '10110001100111110000', '11010101101110011100', '11011001101111110011', '11101010110000101100', '10100100100100101010', '01011111101111001001', '11101111011000000110', '01111001000100100111', '11000010001011010001', '00000011000010000001', '10000110100111010000', '10111010000010011001', '01010010100111001010']] config = [[int(a) for a in n.strip()] for n in open(r"C:\Users\kenne\Downloads\jbox.txt").readlines()] def xor(x, y): assert(len(x) == len(y)) return ''.join('1' if p != q else '0' for p, q in zip(x, y)) def process(out): n = len(out[0]) # number of bits res = [list() for _ in range(n)] out = [[bits, [i]] for i, bits in enumerate(out)] # contains vector, path out.sort(key = lambda x: 1000 if '1' not in x[0] else -x[0].index('1')) for i in range(len(out)): bits, path = out[i] for j in range(i): if '1' not in out[j][0]: continue pos = out[j][0].index('1') if bits[pos] == '1': bits = xor(bits, out[j][0]) for next_bit in out[j][1]: if next_bit in path: path.remove(next_bit) else: path.append(next_bit) out[i] = [bits, path] out.sort(key = lambda x: -int(x[0], 2)) # sort to make a triangle shape # note: here, out will look something like this: # V this is the vector # ['10000000101010100110', [0, 20, 13]] <- and this is the path to reach this vector # ['01110100001000111011', [3, 17]] # ['00110101111101111000', [6, 3, 17]] # ['00011101011000001011', [13, 6, 17]] # ['00001101001100011100', [15, 3, 17]] # ['00000100010001000010', [5, 15, 17, 20, 4, 6]] # ['00000011000010000001', [17]] # ['00000001000001100101', [4, 6, 1]] # ['00000000100000000001', [8, 15, 7, 1, 2, 17, 20]] # ['00000000010000110010', [9, 0, 13, 5, 4, 6, 8, 7, 1, 2, 17]] # ... (more lines below) # as you can see, the vectors now are all linearly independent (in a triangle shape) # and now its relatively easy to make it to an almost row-echelon form for i in reversed(range(n)): bits, path = out[i] if '1' not in bits: continue pos = bits.index('1') for j in range(i): if out[j][0][pos] == '1': out[j][0] = xor(bits, out[j][0]) for next_bit in path: if next_bit in out[j][1]: out[j][1].remove(next_bit) else: out[j][1].append(next_bit) # now our output looks like this # ['10000000000000000000', [7, 11, 13, 9, 2, 0, 15, 14, 17, 20, 18, 10, 6, 5]] # ['01000000000000000010', [7, 8, 13, 5, 17, 2, 10, 6, 4, 18, 14]] # ['00100000000000000010', [2, 10, 12, 6, 4, 0, 1, 18, 14]] # ['00010000000000000000', [11, 4, 0, 9, 13, 5, 1, 17, 20, 18, 14]] # ['00001000000000000010', [3, 2, 13, 5, 1, 17, 10, 20, 18, 12, 14, 6, 8]] # ['00000100000000000000', [4, 18, 12, 0, 14, 9, 7, 6, 8]] # ['00000010000000000000', [7, 12, 15, 5, 4, 18, 20, 14]] # many vectors have 1 single bit, which means we can recover one input byte # e.g. consider the first row of our output # ['10000000000000000000', [7, 11, 13, 9, ...] # -> s[0] = out[7] xor out[11] xor out[13] xor ..., where out is the original output return out ``` However, notice that some rows of our output consists of 2 '1' bits, like `01000000000000000010` and `00001000000000000010`. This means we cannot uniquely determine some of these values (Here, the values include `s[1], s[5], and s[18]`). However, we can just brute-force `s[18]`, and check if the resulting input has the correct hash. Here is the remainder of the solve script: (Note: I solved separately based on the possible lengths of the input, but this was probably unnecessary.) ```python outs = [process(outs[i]) for i in range(len(outs))] next_outs = list() for out in outs: res = list() for bits, path in out: if '1' not in bits: continue res.append((bits, path)) next_outs.append(res) outs = next_outs[::] outs[4] = outs[4][:-1] + [('0000000000000000000', [])] + outs[4][-1:] outs[5] = outs[5][:-1] + [('00000000000000000000', [])] + outs[5][-1:] def solve2(seq, n, ignore_pos, c, out): res = [0 for _ in range(n)] res[ignore_pos] = c for i in reversed(range(n)): if i == ignore_pos: continue c = 0 for x in out[i][1]: if x >= len(seq): continue c ^= seq[x] if out[i][0].count('1') > 1: c ^= res[ignore_pos] res[i] = c return res[::-1] def solve(seq, n): seq = bytes.fromhex(seq) # n is guessed length pos = n - 15 ignore_pos = n - 2 if pos == 4 or pos == 5: res = list() for c in printable.encode(): res.append(solve2(seq, n, ignore_pos, c, outs[pos])) return res else: # can uniquely determine the inverse res = list() for bits, path in outs[pos]: c = 0 for x in path: if x >= len(seq): continue c ^= seq[x] res.append(c) return [bytes(res)[::-1]] def sha42(s: bytes, rounds=42): out = [0]*21 for round in range(rounds): for c in range(len(s)): if config[((c//21)+round)%len(config)][c%21] == 1: out[(c+round)%21] ^= s[c] return bytes(out).hex() from pwn import * from string import printable context.log_level = 'debug' r = remote('hash.chal.imaginaryctf.org', 1337) for _ in range(50): r.recvuntil(b'sha42(password) = ') seq = r.recvline().strip().decode() res = list() for i in range(15, 21): for test in solve(seq, i): res.append(test) res = [row for row in res if all(c in printable.encode() for c in row) and sha42(row) == seq] r.sendline(bytes(res[0]).hex().encode()) ```