Assignment1: RISC-V Assembly and Instruction Pipeline

# Assignment1: RISC-V Assembly and Instruction Pipeline contributed by < [`蔡承遠`](https://github.com/Meowwww5/computer_arch_2024_HW1) > ## Quiz1 problem B ### C program ```c #include "stdio.h" #include "stdint.h" typedef union { uint16_t bits; }bf16_t; float test[3] = {3.14159, 1.618, -123.456}; bf16_t test_bf16_output[3] = {0x0}; float test_output[3] = {0}; static inline float bf16_to_fp32(bf16_t h) { union { float f; uint32_t i; } u = {.i = (uint32_t)h.bits << 16}; return u.f; } static inline bf16_t fp32_to_bf16(float s) { bf16_t h; union { float f; uint32_t i; } u = {.f = s}; if ((u.i & 0x7fffffff) > 0x7f800000) { /* NaN */ h.bits = (u.i >> 16) | 64; /* force to quiet */ return h; } h.bits = (u.i + (0x7fff + ((u.i >> 0x10) & 1))) >> 0x10; return h; } int main(){ for (int i = 0; i < 3; i++) { test_bf16_output[i] = fp32_to_bf16(test[i]); test_output[i] = bf16_to_fp32(test_bf16_output[i]); printf("%f, 0x%.8X, %f \n", test[i], test_bf16_output[i].bits, test_output[i]); } return 0; } ``` ### Assembly ```c .data test: .word 0x40490FD0 # 3.14159 .word 0x3FCF1AA0 # 1.618 .word 0xC2F6E979 # -123.456 test_bf16_output: .half 0x0 # converted bfloat16 values .half 0x0 .half 0x0 test_output: .word 0x0 # converted float32 recovered values .word 0x0 .word 0x0 .text .global main main: # Initialize loop counter la s0, test la s1, test_bf16_output la s2, test_output li s3, 0 # i = 0 li s4, 3 # num = 3 (size of the test_data array) li s5, 0xffff0000 loop: bge s3, s4, end_loop # if i >= num, exit loop lw a0, 0(s0) # Load float value (FP32) into a0 li a7, 2 ecall jal ra, fp32_to_bf16 # Call function li a7, 34 ecall jal ra, bf16_to_fp32 li a7, 2 ecall # Load test_data[i+1] addi s0, s0, 4 # Address of test_data addi s1, s1, 2 # Address of test_bf16_output addi s2, s2, 4 # Address of expected_fp32 j next_iteration fp32_to_bf16: li t0, 0x7fffffff li t6, 0 and t1, t6, t0 li t0, 0x7f800000 bgt t1, t0, handle_nan _1: li t0, 0x7fff srli t1, a0, 16 andi t1, t1, 1 add t1, t1, t0 add t1, t1, a0 srli t1, t1, 16 mv a0, t1 sh a0, (0)s1 ret handle_nan: srli t1, a0, 16 ori t1, t1, 64 mv a0, t1 j _1 bf16_to_fp32: # a0 contains the input bf16 (h.bits) slli t0, a0, 16 mv a0, t0 sw a0, (0)s2 ret next_iteration: addi s3, s3, 1 # i++ j loop # Go to the next iteration end_loop: li a7, 10 li a0, 0 ecall ``` ## Third Maximum Number ([LeetCode 414](https://leetcode.com/problems/third-maximum-number/description/)) > Description: > Given an integer array nums, return the third distinct maximum number in this array. If the third maximum does not exist, return the maximum number. Constraints: >* 1 <= nums.length <= 104 >* -2^31 <= nums[i] <= 2^31 - 1 ### Idea ### C program #### First version ```c #include "stdio.h" int test_data2[2] = {1, 2}; int test_data1[3] = { 3, 2, 1 }; int test_data3[4] = { 2, 2, 3, 1 }; int thirdMax(int* nums, int numsize) { int max1 = 0, max2 = 0, max3 = 0, out = 0; for (int i = 0; i < numsize; i++){ if (nums[i] != max1 && nums[i] != max2 && nums[i] != max3) { if (nums[i] > max1){ max3 = max2; max2 = max1; max1 = nums[i]; } else if(nums[i] > max2){ max3 = max2; max2 = nums[i]; } else if (nums[i] > max3) { max3 = nums[i]; } } } if (numsize >= 3 ){ out = max3; } else{ out = max1; } return out; } int main() { int numsize, result; for (int i = 0; i < 3; i++){ if (i==0){ numsize = sizeof(test_data1) / 4; result = thirdMax(&test_data1, numsize); printf("%d \n", result); } if (i == 1) { numsize = sizeof(test_data2) / 4; result = thirdMax(&test_data2, numsize); printf("%d \n", result); } if (i == 2) { numsize = sizeof(test_data3) / 4; result = thirdMax(&test_data3, numsize); printf("%d \n", result); } } return 0; } ``` ### Assembly ```c .data test_data1: .word 3, 2, 1 test_data2: .word 1, 2 test_data3: .word 2, 2, 3, 1 nums_len: .word 3, 2, 4 # Length of the test data 1, 2 and 3 .text .globl main main: addi s2, s2, 3 la s0, test_data1 la s1, nums_len _1: beqz s2, end jal ra, load_test_data li a7, 1 # once get result, output ecall addi s2, s2, -1 addi s0, s3, 0 addi s1, s1, 4 j _1 load_test_data: mv a0, s0 # Load address of nums into a0 lw a1, (0)s1 # Load length of the array into a1 li t0, 0 # Initialize distinct count li t1, 0 # To keep track of max1 li t2, 0 li t3, 0 find_max: beqz a1, check_result # If a1 (length) is 0, go to check result # Increment distinct count addi t0, t0, 1 lw t4, 0(a0) # Load current number into t4 addi a0, a0, 4 # Move to the next element addi a1, a1, -1 # Decrement the length # Check if t4 is a new distinct maximum beq t4, t1, find_max # If t4 is max1, skip beq t4, t2, find_max beq t4, t3, find_max # Update the distinct max variables bge t4, t1, max_1 bge t4, t2, max_2 bge t4, t3, max_3 j find_max # Repeat for the next element max_1: mv t3, t2 mv t2, t1 mv t1, t4 j find_max max_2: mv t3, t2 mv t2, t4 j find_max max_3: mv t3, t4 j find_max check_result: # Check distinct count li t5, 3 bge t0, t5, return_max3 # If > 3 distinct max, return max3 # If there are <3 distinct max values, return max mv s3, a0 mv a0, t1 ret return_max3: mv s3, a0 mv a0, t3 ret end: # Exit program li a7, 10 # Exit syscall ecall ```