# Assignment1: RISC-V Assembly and Instruction Pipeline
contribute by < [dingsen-Greenhorn](https://github.com/dingsen-Greenhorn) >
## uf8 implements a logarithmic 8-bit codec
---
### decoder
#### c_code
```clike
/* Decode uf8 to uint32_t */
uint32_t uf8_decode(uf8 fl)
{
uint32_t mantissa = fl & 0x0f;
uint8_t exponent = fl >> 4;
uint32_t offset = (0x7FFF >> (15 - exponent)) << 4;
return (mantissa << exponent) + offset;
}
```
#### assembly
```riscv
uf8_decode:
# a0 = encoded byte
srli t0, a0, 4 # exponent
andi t1, a0, 0xF # mantissa
# overflow = (1 << exponent) * 16 - 16
li t2, 1
addi t0, t0, 1
sll t2, t2, t0 # 1 << exponent
slli t2, t2, 4 # *16
addi t2, t2, -16 # -16
# decoded = overflow + (mantissa << exponent)
sll t3, t1, t0
add a0, t2, t3
ret
```
### encoder
```clike
/* Encode uint32_t to uf8 */
uf8 uf8_encode(uint32_t value)
{
/* Use CLZ for fast exponent calculation */
if (value < 16)
return value;
/* Find appropriate exponent using CLZ hint */
int lz = clz(value);
int msb = 31 - lz;
/* Start from a good initial guess */
uint8_t exponent = 0;
uint32_t overflow = 0;
if (msb >= 5) {
/* Estimate exponent - the formula is empirical */
exponent = msb - 4;
if (exponent > 15)
exponent = 15;
/* Calculate overflow for estimated exponent */
for (uint8_t e = 0; e < exponent; e++)
overflow = (overflow << 1) + 16;
/* Adjust if estimate was off */
while (exponent > 0 && value < overflow) {
overflow = (overflow - 16) >> 1;
exponent--;
}
}
/* Find exact exponent */
while (exponent < 15) {
uint32_t next_overflow = (overflow << 1) + 16;
if (value < next_overflow)
break;
overflow = next_overflow;
exponent++;
}
uint8_t mantissa = (value - overflow) >> exponent;
return (exponent << 4) | mantissa;
}
```
#### clz
```clike
static inline unsigned clz(uint32_t x)
{
int n = 32, c = 16;
do {
uint32_t y = x >> c;
if (y) {
n -= c;
x = y;
}
c >>= 1;
} while (c);
return n - x;
}
```
#### assembly
```riscv=
#############################################
# UF8 encode (RV32I-only)
#############################################
uf8_encode:
li s2, 16
blt a0, s2, encode_small
# Compute MSB (software)
addi t1, a0, 0
li s2, 0
msb_loop:
li t3, 1
ble t1, t3, msb_done
srli t1, t1, 1
addi s2, s2, 1
j msb_loop
msb_done:
# exponent = clamp(msb - 4, 0, 15)
addi t3, s2, -4
blt t3, x0, exp_zero
li t4, 15
bgt t3, t4, exp_cap
j exp_set
exp_zero:
li t3, 0
j exp_set
exp_cap:
li t3, 15
exp_set:
# overflow = (1 << exponent) * 16 - 16
li t4, 1
sll t6, t4, t3
addi t6, t6, -1
slli t4, t6, 4
# mantissa = (value - overflow) >> exponent
sub t5, a0, t4
srl t5, t5, t3
# result = (exponent << 4) | mantissa
slli t3, t3, 4
or a0, t3, t5
ret
encode_small:
ret
```
### test result
#### C code after encoder and decoder compress to uf8

The greatest error uppor bond is 6.25%
#### assembly after encoder and decoder to uf8
for computation limit, we pick 5 value that is close to the error Phenomenon
we pick 230 ,240, 245, 250, 260
which that in Hex is 0xE6, 0xF0, 0xF5, 0xFA, 0x104
- 
##### The decoder answer after uf8 encoder
```riscv
Decoded: 240
Program exited with code: 0
Decoded: 256
Program exited with code: 0
```
which we can see that for 240 to 256
there are all 49
## Matrix multiplication using bfloat16