# Assignment1: RISC-V Assembly and Instruction Pipeline
contributed by < [`蔡承遠`](https://github.com/Meowwww5/computer_arch_2024_HW1) >
## Quiz1 problem B
### C program
```c
#include "stdio.h"
#include "stdint.h"
typedef union
{
uint16_t bits;
}bf16_t;
float test[3] = {3.14159, 1.618, -123.456};
bf16_t test_bf16_output[3] = {0x0};
float test_output[3] = {0};
static inline float bf16_to_fp32(bf16_t h)
{
union {
float f;
uint32_t i;
} u = {.i = (uint32_t)h.bits << 16};
return u.f;
}
static inline bf16_t fp32_to_bf16(float s)
{
bf16_t h;
union {
float f;
uint32_t i;
} u = {.f = s};
if ((u.i & 0x7fffffff) > 0x7f800000) { /* NaN */
h.bits = (u.i >> 16) | 64; /* force to quiet */
return h;
}
h.bits = (u.i + (0x7fff + ((u.i >> 0x10) & 1))) >> 0x10;
return h;
}
int main(){
for (int i = 0; i < 3; i++)
{
test_bf16_output[i] = fp32_to_bf16(test[i]);
test_output[i] = bf16_to_fp32(test_bf16_output[i]);
printf("%f, 0x%.8X, %f \n", test[i], test_bf16_output[i].bits, test_output[i]);
}
return 0;
}
```
### Assembly
```c
.data
test:
.word 0x40490FD0 # 3.14159
.word 0x3FCF1AA0 # 1.618
.word 0xC2F6E979 # -123.456
test_bf16_output:
.half 0x0 # converted bfloat16 values
.half 0x0
.half 0x0
test_output:
.word 0x0 # converted float32 recovered values
.word 0x0
.word 0x0
.text
.global main
main:
# Initialize loop counter
la s0, test
la s1, test_bf16_output
la s2, test_output
li s3, 0 # i = 0
li s4, 3 # num = 3 (size of the test_data array)
li s5, 0xffff0000
loop:
bge s3, s4, end_loop # if i >= num, exit loop
lw a0, 0(s0) # Load float value (FP32) into a0
li a7, 2
ecall
jal ra, fp32_to_bf16 # Call function
li a7, 34
ecall
jal ra, bf16_to_fp32
li a7, 2
ecall
# Load test_data[i+1]
addi s0, s0, 4 # Address of test_data
addi s1, s1, 2 # Address of test_bf16_output
addi s2, s2, 4 # Address of expected_fp32
j next_iteration
fp32_to_bf16:
li t0, 0x7fffffff
li t6, 0
and t1, t6, t0
li t0, 0x7f800000
bgt t1, t0, handle_nan
_1:
li t0, 0x7fff
srli t1, a0, 16
andi t1, t1, 1
add t1, t1, t0
add t1, t1, a0
srli t1, t1, 16
mv a0, t1
sh a0, (0)s1
ret
handle_nan:
srli t1, a0, 16
ori t1, t1, 64
mv a0, t1
j _1
bf16_to_fp32:
# a0 contains the input bf16 (h.bits)
slli t0, a0, 16
mv a0, t0
sw a0, (0)s2
ret
next_iteration:
addi s3, s3, 1 # i++
j loop # Go to the next iteration
end_loop:
li a7, 10
li a0, 0
ecall
```
## Third Maximum Number ([LeetCode 414](https://leetcode.com/problems/third-maximum-number/description/))
> Description:
> Given an integer array nums, return the third distinct maximum number in this array. If the third maximum does not exist, return the maximum number.
Constraints:
>* 1 <= nums.length <= 104
>* -2^31 <= nums[i] <= 2^31 - 1
### Idea
### C program
#### First version
```c
#include "stdio.h"
int test_data2[2] = {1, 2};
int test_data1[3] = { 3, 2, 1 };
int test_data3[4] = { 2, 2, 3, 1 };
int thirdMax(int* nums, int numsize) {
int max1 = 0, max2 = 0, max3 = 0, out = 0;
for (int i = 0; i < numsize; i++){
if (nums[i] != max1 && nums[i] != max2 && nums[i] != max3) {
if (nums[i] > max1){
max3 = max2;
max2 = max1;
max1 = nums[i];
}
else if(nums[i] > max2){
max3 = max2;
max2 = nums[i];
}
else if (nums[i] > max3) {
max3 = nums[i];
}
}
}
if (numsize >= 3 ){
out = max3;
}
else{
out = max1;
}
return out;
}
int main() {
int numsize, result;
for (int i = 0; i < 3; i++){
if (i==0){
numsize = sizeof(test_data1) / 4;
result = thirdMax(&test_data1, numsize);
printf("%d \n", result);
}
if (i == 1) {
numsize = sizeof(test_data2) / 4;
result = thirdMax(&test_data2, numsize);
printf("%d \n", result);
}
if (i == 2) {
numsize = sizeof(test_data3) / 4;
result = thirdMax(&test_data3, numsize);
printf("%d \n", result);
}
}
return 0;
}
```
### Assembly
```c
.data
test_data1: .word 3, 2, 1
test_data2: .word 1, 2
test_data3: .word 2, 2, 3, 1
nums_len: .word 3, 2, 4 # Length of the test data 1, 2 and 3
.text
.globl main
main:
addi s2, s2, 3
la s0, test_data1
la s1, nums_len
_1:
beqz s2, end
jal ra, load_test_data
li a7, 1 # once get result, output
ecall
addi s2, s2, -1
addi s0, s3, 0
addi s1, s1, 4
j _1
load_test_data:
mv a0, s0 # Load address of nums into a0
lw a1, (0)s1 # Load length of the array into a1
li t0, 0 # Initialize distinct count
li t1, 0 # To keep track of max1
li t2, 0
li t3, 0
find_max:
beqz a1, check_result # If a1 (length) is 0, go to check result
# Increment distinct count
addi t0, t0, 1
lw t4, 0(a0) # Load current number into t4
addi a0, a0, 4 # Move to the next element
addi a1, a1, -1 # Decrement the length
# Check if t4 is a new distinct maximum
beq t4, t1, find_max # If t4 is max1, skip
beq t4, t2, find_max
beq t4, t3, find_max
# Update the distinct max variables
bge t4, t1, max_1
bge t4, t2, max_2
bge t4, t3, max_3
j find_max # Repeat for the next element
max_1:
mv t3, t2
mv t2, t1
mv t1, t4
j find_max
max_2:
mv t3, t2
mv t2, t4
j find_max
max_3:
mv t3, t4
j find_max
check_result:
# Check distinct count
li t5, 3
bge t0, t5, return_max3 # If > 3 distinct max, return max3
# If there are <3 distinct max values, return max
mv s3, a0
mv a0, t1
ret
return_max3:
mv s3, a0
mv a0, t3
ret
end:
# Exit program
li a7, 10 # Exit syscall
ecall
```