contributed by <naan1>
static inline float fabsf(float x) {
uint32_t i = *(uint32_t *)&x;
i &= 0x7FFFFFFF;
x = *(float *)&i;
return x;
}
.data
test1: .word 0xC0600000 # -3.5 in IEEE-754 single point format
expected1: .word 0x40600000 # 3.5, expected result after fabsf
equal_msg: .string "Values are equal\n"
notequal_msg: .string "Values are not equal\n"
.text
fabsf:
la t0, test1
lw a0, 0(t0)
li t3, 0x7FFFFFFF
and a0, a0, t3
la t1, expected1
lw t1, 0(t1)
beq a0, t1, values_equal # If equal, branch to values_equal
values_equal:
# Print "Values are equal"
la a0, equal_msg
li a7, 4
ecall
end_program:
# Exit the program
li a7, 10
ecall
static inline int my_clz(uint32_t x) {
int count = 0;
for (int i = 31; i >= 0; --i) {
if (x & (1U << i))
break;
count++;
}
return count;
}
.data
test_data: .word 0x0000F000
expected: .word 16
equal_msg: .string "\nValues are equal\n"
notequal_msg: .string "\nValues are not equal\n"
.text
my_clz:
la t0, test_data
lw a0, 0(t0)
addi sp, sp, -8
sw s0, 0(sp)
sw s1, 4(sp)
li s0, 0
li s1, 31
clz_loop:
blt s1, zero, clz_end
li t0, 1
sll t0, t0, s1
and t0, a0, t0
bne t0, zero, clz_end
addi s0, s0, 1
addi s1, s1, -1
j clz_loop
clz_end:
mv a0, s0
la t1, expected
lw t2, 0(t1)
beq a0, t2, values_equal
la a0, notequal_msg
li a7, 4
ecall
j end_program
values_equal:
# Print "Values are equal"
la a0, equal_msg
li a7, 4
ecall
end_program:
li a7, 10
ecall
static inline uint32_t fp16_to_fp32(uint16_t h) {
const uint32_t w = (uint32_t) h << 16;
const uint32_t sign = w & UINT32_C(0x80000000);
const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF);
uint32_t renorm_shift = my_clz(nonsign);
renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0;
const int32_t inf_nan_mask = ((int32_t)(nonsign + 0x04000000) >> 8) &
INT32_C(0x7F800000);
const int32_t zero_mask = (int32_t)(nonsign - 1) >> 31;
return sign | ((((nonsign << renorm_shift >> 3) +
((0x70 - renorm_shift) << 23)) | inf_nan_mask) & ~zero_mask);
}
.data
testdata: .word 0x3C00
expect: .word 0x3F800000
equal_msg: .string "value equal\n"
unequal_msg: .string "value not equal\n"
.text
.globl main
main:
# Load testdata and expect
la t0, testdata # Load address of testdata
lw t0, 0(t0) # Load testdata into t0
la s6, expect
lw s5, 0(s6)
slli t0, t0, 16 # Shift left 16 bits to align with 32-bit float
li t1, 0x80000000 # Load the sign mask (0x80000000)
and t2, t0, t1 # Extract sign bit (t2 = sign)
li t1, 0x7FFFFFFF # Load nonsign mask (0x7FFFFFFF)
and t3, t0, t1 # Extract nonsign part (t3 = nonsign)
# Function to count leading zeros in a 32-bit value
my_clz:
li s0, 0
li t0, 31
clz_loop:
blt t0, zero, clz_end
srl t1, t3, t0
andi t1, t1, 1
bnez t1, clz_end
addi s0, s0, 1 # Increment count
addi t0, t0, -1 # Decrement index
j clz_loop # Continue loop
clz_end:
li t1, 5
blt s0, t1, no_adjust # If renorm_shift < 5, skip adjustment
sub s0, s0, t1 # s0 = renorm_shift = renorm_shift - 5
j apply_shift # Jump to apply_shift
no_adjust:
li s0, 0 # renorm_shift = 0
apply_shift:
li t5, 0x04000000
add t5, t5, t3 # nonsign + 0x04000000
srli t5, t5, 8
li t6, 0x7F800000 # Load inf_nan mask
and t5, t5, t6 # inf_nan_mask=t5 = ((nonsign + 0x04000000) >> 8) & 0x7F800000
# Compute zero_mask
addi t6, t3, -1 # nonsign - 1
srai t6, t6, 31 # zero_mask = t6 = (nonsign - 1) >> 31
sll t3, t3, s0 # nonsign << renorm_shift
srli t3, t3, 3 # nonsign >> 3
li t1, 0x70 # Load constant 0x70
sub t1, t1, s0 # 0x70 - renorm_shift
slli t1, t1, 23
add t3, t3, t1
or t3, t3, t5 # Add inf_nan_mask
not t6, t6 # ~zero_mask
and t3, t3, t6 # &~zero_mask
or s4, t2, t3 #
beq s4, s5, equal
j unequal
equal:
la a0, equal_msg
li a7, 4
ecall
j end
unequal:
la a0, unequal_msg
li a7, 4
ecall
end:
li a7, 10
ecall
Gray code is widely used for error detection and correction in digital communication, data storage systems or k-map. It represents a way of sequencing binary numbers such that adjacent values vary by only one binary digit. And there are some ways to generate gray codes, thus I choose this problem as a practice to compare two of methods and practice the bitwise operation from quiz.
For the 89. Gray Code problem from LeetCode, the objective is to generate a sequence of integers that represents a Gray code, where each successive value in the sequence differs by exactly one bit from the previous value.
The sequence of a Gray code for
n
bits follows a specific pattern:
- For a 1-bit Gray code, the sequence starts with 0 and 1.
- For a 2-bit Gray code, the sequence builds upon the 1-bit sequence, resulting in
00
,01
,11
, and10
.- For a 3-bit Gray code, it extends the 2-bit sequence by reflecting the previous sequence and adding a leading 1 to each reflected element.
The fundamental idea is that each integer in the sequence should only differ by one binary digit from its adjacent integers, ensuring a minimal transition state. The structure of Gray code can be recursively generated or constructed using specific formulas that guarantee these properties.
I think that there are 2 methods to generate the gray codes
Recursive way:
This code first generates an n-1 bit Gray Code recursively. Then, it reflects the current generated result and adds 1 to the most significant bit of the reflected part. This expands the smaller Gray Code result into a full n-bit Gray Code.
Bitset
Using bitwise operations to generate Gray Code: for each i, it performs an XOR operation between i and the result of i shifted right by one bit. This directly calculates the Gray Code representation of i.
Input | Output |
---|---|
n = 1 | [0,1] |
n = 2 | [0,1,3,2] |
n = 3 | [0,1,3,2,6,7,5,4] |
#include <stdio.h>
#include <stdio.h>
void generateGrayCode(int n, int* result) {
if (n == 0) {
result[0] = 0;
return;
}
generateGrayCode(n - 1, result);
int size = 1 << (n - 1);
for (int i = 0; i < size; i++) {
result[size + i] = result[size - 1 - i] | (1 << (n - 1));
}
}
int main() {
int n = 3;
int size = 1 << n;
int result[size];
generateGrayCode(n, result);
printf("Gray Code for n=%d:\n", n);
for (int i = 0; i < size; i++) {
printf("%d ", result[i]);
}
return 0;
}
#include <stdio.h>
void grayCode(int n) {
int size = 1 << n;
int result[size];
for (int i = 0; i < size; i++) {
result[i] = i ^ (i >> 1);
printf("%d ", result[i]);
}
}
int main() {
printf("Gray Code for n=3:\n");
grayCode(3); // n = 3
return 0;
}
.data
arr: .word 32
index: .word 0
.text
main:
la t0, index
li t1, 0
sw t1, 0(t0)
la t2, arr
# Prepare arguments for generateGray(3, 0, arr, &index)
li a0, 3
li a1, 0
mv a2, t2
la a3, index
# Call the recursive function
jal ra, generateGray
# Exit the program properly
li a7, 10
ecall
generateGray:
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
mv s0, a0
mv s1, a3
beqz s0, generateGray_base_case
addi a0, s0, -1
mv a1, a1
mv a2, a2
mv a3, s1
jal ra, generateGray # First recursive call
li t0, 1
addi t1, s0, -1
sll t0, t0, t1
xor a1, a1, t0
addi a0, s0, -1
mv a2, a2
mv a3, s1
jal ra, generateGray
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
generateGray_base_case:
lw t0, 0(s1)
slli t1, t0, 2
add t2, a2, t1
sw a1, 0(t2)
addi t0, t0, 1
sw t0, 0(s1)
mv a0, a1
li a7, 1
ecall
li a0, 10
li a7, 11
ecall
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
.data
gray_codes: .word 0
num: .word 8
.text
.globl _start
_start:
la t0, gray_codes
la t1, num
lw t2, 0(t1)
li t3, 0
loop:
bge t3, t2, end
mv t4, t3
srai t5, t3, 1
xor t6, t4, t5
sw t6, 0(t0)
addi t0, t0, 4
addi t3, t3, 1
j loop
end:
li a7, 10
ecall
Implementation | Cycles | Instrs. retired | CPI (Cycles Per Instruction) | IPC (Instructions Per Cycle) | Clock rate |
---|---|---|---|---|---|
Recursive | 488 | 351 | 1.39 | 0.719 | 9.09 Hz |
Bitwise | 97 | 73 | 1.33 | 0.753 | 11.11 Hz |
Here we take the srai command as an example
srai x30 x18 1
is 0x00000020
(PC currently points to the address).0x00000024
)0x401e5f13
, which is the encoded form of the srai x30, x28, 1
instruction.0x401e5f13
is decoded, extracting the opcode (0x01 for srai), source register (x28), destination register (x30).0x00000001
.
ALU takes in two operands: the value from register x28
(currently shown as 0x00000000
) and the immediate value 1 (0x00000001).
The ALU will perform srai
operation on x28
by 1 bit.
Because this instruction does not save the value back to memory, it will not be used. Instead, the value is sent to the next stage through the pipeline.
Finally we will write the value 0x00000000
back to the target register