# Writeup - Hash
```python
def sha42(s: bytes, rounds=42):
out = [0]*21
for round in range(rounds):
for c in range(len(s)):
if config[((c//21)+round)%len(config)][c%21] == 1:
out[(c+round)%21] ^= s[c]
return bytes(out).hex()
```
By examining the sha42() function, we see that each byte of the output is the xor-sum of some subset of the input bytes.
For example,
`out[0] = s[0] xor s[4] xor s[5] xor s[7] xor ...`,
`out[1] = ...`, etc
Since we have a bunch of xor-equations, the idea here is that we can combine these equations and move around the variables, so that we end up with something like:
`s[0] = out[0] xor out[6] xor out[20] xor out[11]`, etc
And we can hence recover each byte of the input `s`.
There are many ways to implement this, but the way I chose to implement my solution was sort of based on the vector-basis-finding algorithm in [this blog](https://codeforces.com/blog/entry/68953). I highly encourage you to read it, but the TL;DR is, given a list of vectors, you can construct another list of vectors that form a "triangle shape" (linearly indepenent) as such:
```python
# warning: pseudocode, not actual python
vectors = ["1100....", "0101....", ...] # list of vectors
new_vectors = list()
for vector in vectors:
for prev_vector in new_vectors:
# if vector contains '1' at leading bit of prev_vector, use xor to cancel it out
if vector[prev_vector.index('1')] == '1':
vector = xor(vector, prev_vector)
new_vectors.append(vector)
new_vectors.sort(lambda x: -x.index('1')) # make sure we xor in the correct order
for line in new_vectors:
print(line) # should give a triangle-shape
```
The idea is to represent each equation in the form of a vector. E.g. `out[0] = s[0] xor s[4] xor s[5]` is represented by the vector `(1, 0, 0, 0, 1, 1, ...)`, where a '1' in position 'i' means that `s[i]` is involved in the xor-equation, and '0' means `s[i]` is not.
We construct the vectors required, then use a similar technique to the one described in the blog above to get a set of linearly independent vectors. (Note: we must also keep track of the "path" to get to each vector.)
For example, if equation 4 is `out[4] = s[2] xor s[3]` and equation 7 is `out[7] = s[2]`, then by combining these equations, we are able to reach the equation `s[3] = out[4] xor out[7]` by taking the path `[4, 7]`.
```python
# outs contains the vector representation of the original equations for different lengths of input s.
outs = [['101100010100101', '000001100101100', '000010111001101', '111001010111010', '000111000001010', '101111101100111', '001110101000011', '001111101111100', '001100111110000', '101101110011100', '001101111110011', '010110000101100', '100100100101010', '111101111001001', '111011000000110', '001000100100111', '010001011010001', '011000010000001', '110100111010000', '010000010011001', '010100111001010'], ['1101100010100101', '1000001100101100', '1000010111001101', '0111001010111010', '1000111000001010', '0101111101100111', '0001110101000011', '1001111101111100', '0001100111110000', '0101101110011100', '1001101111110011', '1010110000101100', '0100100100101010', '1111101111001001', '1111011000000110', '1001000100100111', '0010001011010001', '0011000010000001', '0110100111010000', '1010000010011001', '0010100111001010'], ['01101100010100101', '01000001100101100', '11000010111001101', '10111001010111010', '01000111000001010', '00101111101100111', '00001110101000011', '01001111101111100', '10001100111110000', '10101101110011100', '11001101111110011', '01010110000101100', '00100100100101010', '11111101111001001', '01111011000000110', '11001000100100111', '00010001011010001', '00011000010000001', '00110100111010000', '11010000010011001', '10010100111001010'], ['001101100010100101', '101000001100101100', '111000010111001101', '110111001010111010', '101000111000001010', '000101111101100111', '000001110101000011', '001001111101111100', '110001100111110000', '010101101110011100', '011001101111110011', '101010110000101100', '100100100100101010', '011111101111001001', '101111011000000110', '111001000100100111', '000010001011010001', '000011000010000001', '000110100111010000', '111010000010011001', '010010100111001010'], ['0001101100010100101', '0101000001100101100', '0111000010111001101', '1110111001010111010', '1101000111000001010', '0000101111101100111', '1000001110101000011', '0001001111101111100', '0110001100111110000', '1010101101110011100', '1011001101111110011', '1101010110000101100', '0100100100100101010', '1011111101111001001', '1101111011000000110', '1111001000100100111', '1000010001011010001', '0000011000010000001', '0000110100111010000', '0111010000010011001', '1010010100111001010'], ['10001101100010100101', '10101000001100101100', '10111000010111001101', '01110111001010111010', '11101000111000001010', '10000101111101100111', '01000001110101000011', '10001001111101111100', '10110001100111110000', '11010101101110011100', '11011001101111110011', '11101010110000101100', '10100100100100101010', '01011111101111001001', '11101111011000000110', '01111001000100100111', '11000010001011010001', '00000011000010000001', '10000110100111010000', '10111010000010011001', '01010010100111001010']]
config = [[int(a) for a in n.strip()] for n in open(r"C:\Users\kenne\Downloads\jbox.txt").readlines()]
def xor(x, y):
assert(len(x) == len(y))
return ''.join('1' if p != q else '0' for p, q in zip(x, y))
def process(out):
n = len(out[0]) # number of bits
res = [list() for _ in range(n)]
out = [[bits, [i]] for i, bits in enumerate(out)] # contains vector, path
out.sort(key = lambda x: 1000 if '1' not in x[0] else -x[0].index('1'))
for i in range(len(out)):
bits, path = out[i]
for j in range(i):
if '1' not in out[j][0]: continue
pos = out[j][0].index('1')
if bits[pos] == '1':
bits = xor(bits, out[j][0])
for next_bit in out[j][1]:
if next_bit in path:
path.remove(next_bit)
else:
path.append(next_bit)
out[i] = [bits, path]
out.sort(key = lambda x: -int(x[0], 2)) # sort to make a triangle shape
# note: here, out will look something like this:
# V this is the vector
# ['10000000101010100110', [0, 20, 13]] <- and this is the path to reach this vector
# ['01110100001000111011', [3, 17]]
# ['00110101111101111000', [6, 3, 17]]
# ['00011101011000001011', [13, 6, 17]]
# ['00001101001100011100', [15, 3, 17]]
# ['00000100010001000010', [5, 15, 17, 20, 4, 6]]
# ['00000011000010000001', [17]]
# ['00000001000001100101', [4, 6, 1]]
# ['00000000100000000001', [8, 15, 7, 1, 2, 17, 20]]
# ['00000000010000110010', [9, 0, 13, 5, 4, 6, 8, 7, 1, 2, 17]]
# ... (more lines below)
# as you can see, the vectors now are all linearly independent (in a triangle shape)
# and now its relatively easy to make it to an almost row-echelon form
for i in reversed(range(n)):
bits, path = out[i]
if '1' not in bits: continue
pos = bits.index('1')
for j in range(i):
if out[j][0][pos] == '1':
out[j][0] = xor(bits, out[j][0])
for next_bit in path:
if next_bit in out[j][1]:
out[j][1].remove(next_bit)
else:
out[j][1].append(next_bit)
# now our output looks like this
# ['10000000000000000000', [7, 11, 13, 9, 2, 0, 15, 14, 17, 20, 18, 10, 6, 5]]
# ['01000000000000000010', [7, 8, 13, 5, 17, 2, 10, 6, 4, 18, 14]]
# ['00100000000000000010', [2, 10, 12, 6, 4, 0, 1, 18, 14]]
# ['00010000000000000000', [11, 4, 0, 9, 13, 5, 1, 17, 20, 18, 14]]
# ['00001000000000000010', [3, 2, 13, 5, 1, 17, 10, 20, 18, 12, 14, 6, 8]]
# ['00000100000000000000', [4, 18, 12, 0, 14, 9, 7, 6, 8]]
# ['00000010000000000000', [7, 12, 15, 5, 4, 18, 20, 14]]
# many vectors have 1 single bit, which means we can recover one input byte
# e.g. consider the first row of our output
# ['10000000000000000000', [7, 11, 13, 9, ...]
# -> s[0] = out[7] xor out[11] xor out[13] xor ..., where out is the original output
return out
```
However, notice that some rows of our output consists of 2 '1' bits, like `01000000000000000010` and `00001000000000000010`. This means we cannot uniquely determine some of these values (Here, the values include `s[1], s[5], and s[18]`).
However, we can just brute-force `s[18]`, and check if the resulting input has the correct hash.
Here is the remainder of the solve script:
(Note: I solved separately based on the possible lengths of the input, but this was probably unnecessary.)
```python
outs = [process(outs[i]) for i in range(len(outs))]
next_outs = list()
for out in outs:
res = list()
for bits, path in out:
if '1' not in bits: continue
res.append((bits, path))
next_outs.append(res)
outs = next_outs[::]
outs[4] = outs[4][:-1] + [('0000000000000000000', [])] + outs[4][-1:]
outs[5] = outs[5][:-1] + [('00000000000000000000', [])] + outs[5][-1:]
def solve2(seq, n, ignore_pos, c, out):
res = [0 for _ in range(n)]
res[ignore_pos] = c
for i in reversed(range(n)):
if i == ignore_pos: continue
c = 0
for x in out[i][1]:
if x >= len(seq): continue
c ^= seq[x]
if out[i][0].count('1') > 1:
c ^= res[ignore_pos]
res[i] = c
return res[::-1]
def solve(seq, n):
seq = bytes.fromhex(seq)
# n is guessed length
pos = n - 15
ignore_pos = n - 2
if pos == 4 or pos == 5:
res = list()
for c in printable.encode():
res.append(solve2(seq, n, ignore_pos, c, outs[pos]))
return res
else:
# can uniquely determine the inverse
res = list()
for bits, path in outs[pos]:
c = 0
for x in path:
if x >= len(seq): continue
c ^= seq[x]
res.append(c)
return [bytes(res)[::-1]]
def sha42(s: bytes, rounds=42):
out = [0]*21
for round in range(rounds):
for c in range(len(s)):
if config[((c//21)+round)%len(config)][c%21] == 1:
out[(c+round)%21] ^= s[c]
return bytes(out).hex()
from pwn import *
from string import printable
context.log_level = 'debug'
r = remote('hash.chal.imaginaryctf.org', 1337)
for _ in range(50):
r.recvuntil(b'sha42(password) = ')
seq = r.recvline().strip().decode()
res = list()
for i in range(15, 21):
for test in solve(seq, i):
res.append(test)
res = [row for row in res if all(c in printable.encode() for c in row) and sha42(row) == seq]
r.sendline(bytes(res[0]).hex().encode())
```