owned this note
owned this note
Published
Linked with GitHub
---
tags: Computer Architecture (2022 Fall)
---
# Homework3: SoftCPU
## c code (from hw2)
```c=
#include<stdio.h>
int largestAltitude(int* gain, int gainSize){
int i, max, arti;
arti = 0;
max=arti;
for(i=0;i<gainSize;i++){
arti=arti+gain[i];
if(arti>max){max = arti;}
}
return max;
}
int main(){
int gain[]={-1,5,4};
int gainSize = 3;
int max = largestAltitude(gain,gainSize);
printf("max1=%d\n",max);
int gain2[]={-5,1,5,0,-7};
gainSize = 5;
max = largestAltitude(gain2,gainSize);
printf("max2=%d\n",max);
int gain3[]={-5,1,3};
gainSize = 3;
max = largestAltitude(gain3,gainSize);
printf("max3=%d\n",max);
return 0 ;
}
```
### makefile
```makefile=
include ../common/Makefile.common
EXE = .elf
SRC = hw3.c
CFLAGS += -L../common
LDFLAGS += -T ../common/default.ld
TARGET = hw3
OUTPUT = $(TARGET)$(EXE)
.PHONY: all clean
all: $(TARGET)
$(TARGET): $(SRC)
$(CC) $(CFLAGS) -o $(OUTPUT) $(SRC) $(LDFLAGS)
$(OBJCOPY) -j .text -O binary $(OUTPUT) imem.bin
$(OBJCOPY) -j .data -O binary $(OUTPUT) dmem.bin
$(OBJCOPY) -O binary $(OUTPUT) memory.bin
$(OBJDUMP) -d $(OUTPUT) > $(TARGET).dis
$(READELF) -a $(OUTPUT) > $(TARGET).symbol
clean:
$(RM) *.o $(OUTPUT) $(TARGET).dis $(TARGET).symbol [id]mem.bin memory.bin
```
### compile into assembly code
```s=
.file "hw3.c"
.option nopic
.attribute arch, "rv32i2p0_m2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 2
.globl largestAltitude
.type largestAltitude, @function
largestAltitude:
addi sp,sp,-48
sw s0,44(sp)
addi s0,sp,48
sw a0,-36(s0)
sw a1,-40(s0)
sw zero,-28(s0)
lw a5,-28(s0)
sw a5,-24(s0)
sw zero,-20(s0)
j .L2
.L4:
lw a5,-20(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
lw a5,0(a5)
lw a4,-28(s0)
add a5,a4,a5
sw a5,-28(s0)
lw a4,-28(s0)
lw a5,-24(s0)
ble a4,a5,.L3
lw a5,-28(s0)
sw a5,-24(s0)
.L3:
lw a5,-20(s0)
addi a5,a5,1
sw a5,-20(s0)
.L2:
lw a4,-20(s0)
lw a5,-40(s0)
blt a4,a5,.L4
lw a5,-24(s0)
mv a0,a5
lw s0,44(sp)
addi sp,sp,48
jr ra
.size largestAltitude, .-largestAltitude
.section .rodata
.align 2
.LC1:
.string "max1=%d\n"
.align 2
.LC2:
.string "max2=%d\n"
.align 2
.LC3:
.string "max3=%d\n"
.align 2
.LC0:
.word -5
.word 1
.word 5
.word 0
.word -7
.text
.align 2
.globl main
.type main, @function
main:
addi sp,sp,-80
sw ra,76(sp)
sw s0,72(sp)
addi s0,sp,80
li a5,-1
sw a5,-36(s0)
li a5,5
sw a5,-32(s0)
li a5,4
sw a5,-28(s0)
li a5,3
sw a5,-20(s0)
addi a5,s0,-36
lw a1,-20(s0)
mv a0,a5
call largestAltitude
sw a0,-24(s0)
lw a1,-24(s0)
lui a5,%hi(.LC1)
addi a0,a5,%lo(.LC1)
call printf
lui a5,%hi(.LC0)
addi a5,a5,%lo(.LC0)
lw a1,0(a5)
lw a2,4(a5)
lw a3,8(a5)
lw a4,12(a5)
lw a5,16(a5)
sw a1,-56(s0)
sw a2,-52(s0)
sw a3,-48(s0)
sw a4,-44(s0)
sw a5,-40(s0)
li a5,5
sw a5,-20(s0)
addi a5,s0,-56
lw a1,-20(s0)
mv a0,a5
call largestAltitude
sw a0,-24(s0)
lw a1,-24(s0)
lui a5,%hi(.LC2)
addi a0,a5,%lo(.LC2)
call printf
li a5,-5
sw a5,-68(s0)
li a5,1
sw a5,-64(s0)
li a5,3
sw a5,-60(s0)
li a5,3
sw a5,-20(s0)
addi a5,s0,-68
lw a1,-20(s0)
mv a0,a5
call largestAltitude
sw a0,-24(s0)
lw a1,-24(s0)
lui a5,%hi(.LC3)
addi a0,a5,%lo(.LC3)
call printf
li a5,0
mv a0,a5
lw ra,76(sp)
lw s0,72(sp)
addi sp,sp,80
jr ra
.size main, .-main
.ident "GCC: (xPack GNU RISC-V Embedded GCC x86_64) 12.2.0"
```
### RTL sim resilt

### ISS sim result

# handwrite assembly optimize
```s=
.file "FHA.c"
.option nopic
.attribute arch, "rv32i2p1"
.attribute unaligned_access, 0
.attribute stack_align, 16
.data
gain: .word -1,5,4
gainsize: .word 3
gain2: .word -5,1,5,0,-7
gainsize2: .word 5
gain3: .word -5,1,3
gainsize3: .word 3
.LC1:
.string "max1=%d\n"
.align 2
.text
.align 2
.globl func
.type func, @function
func:
mv t1, x0 #int i = 0
mv a2, x0 #arti = 0
mv a3, x0 #max=0
loop:
lw a1, 0(s0) #load gain[i]
addi s0, s0, 4
add a2, a2 ,a1 #arti=arti+gain[i]
blt a2, a3 ,conti #if arti[i]<max
mv a3, a2 #max=arti
conti:
addi t1, t1 ,1 # i++
blt t1, s1, loop # i<gainsize jump to loop conti
mv a1 ,a3
#li a7 ,1
#ecall
jr ra
.size func, .-func
.text
.align 2
.globl main
.type main, @function
main:
addi sp,sp,-32
sw ra,28(sp)
la s0, gain #load gain address
lw s1, gainsize
call func
lui a5,%hi(.LC1)
addi a0,a5,%lo(.LC1)
call printf
la s0, gain2 #load gain2 address
lw s1, gainsize2
call func
lui a5,%hi(.LC1)
addi a0,a5,%lo(.LC1)
call printf
la s0, gain3 #load gain3 address
lw s1, gainsize3
jal func
lui a5,%hi(.LC1)
addi a0,a5,%lo(.LC1)
call printf
#li a7 10
#ecall
addi a0,x0,0
lw ra,28(sp)
addi sp,sp,32
jr ra
.size main, .-main
.ident "GCC: (xPack GNU RISC-V Embedded GCC x86_64) 12.2.0"
```
Obiviously, the line of handwritten assembly codes are fewer then compiler generated. Because I reduce many unnecessary load/stroe instructions.
### makefile
```makefile=
include ../common/Makefile.common
EXE = .elf
ASS = .s
SRC = hw3.c
CFLAGS += -L../common
LDFLAGS += -T ../common/default.ld
TARGET = hw3_opt_hand
OUTPUT = $(TARGET)$(EXE)
OUTPUT_ASS =$(TARGET)$(ASS)
SRC_ASS = hw3_opt_hand.s
.PHONY: all clean
all: $(TARGET)
$(TARGET):
$(CC) $(CFLAGS) -o $(OUTPUT) $(SRC_ASS) $(LDFLAGS)
$(OBJCOPY) -j .text -O binary $(OUTPUT) imem.bin
$(OBJCOPY) -j .data -O binary $(OUTPUT) dmem.bin
$(OBJCOPY) -O binary $(OUTPUT) memory.bin
$(OBJDUMP) -d $(OUTPUT) > $(TARGET).dis
$(READELF) -a $(OUTPUT) > $(TARGET).symbol
clean:
$(RM) *.o $(OUTPUT) $(TARGET).dis $(TARGET).symbol [id]mem.bin memory.bin
```
### RTL sim result

### ISS sim result

## comparsion (using code from hw2)
|RTL sim cycle|opt|
|-|-|
|6414|no (only rely on compiler)|
|6201|handwrite assembly|
## control hazard

```
f8: fec42583 lw a1,-20(s0)
fc: 00078513 mv a0,a5
100: f3dff0ef jal ra,3c <largestAltitude>
104: fea42423 sw a0,-24(s0)
108: fe842583 lw a1,-24(s0)
```
CPU will fetch 2 wrong instruction before jumping to right destionation pc, so flush 2 cycle. Also use wb_nop and wb_nop_more to prevent invalid write back action.
## data hazard
```
74: 0007a783 lw a5,0(a5) #old a5=x15=0X3FFC8, new a5=dmem[0x3FFC8]=0XFFFFFFFB
78: fe442703 lw a4,-28(s0) # s0=x8=0x3FFB0,a4=dmem[0x3FFB0-28]=dmem[0x3FF94]=0
7c: 00f707b3 add a5,a4,a5 # a5 = 0+0XFFFFFFFB
```

Execute the address for dmem, get rdata at next cycle, then forward to alu, so alu result is correct at next cycle.
## leetcode (medium)
75. Sort Colors
https://leetcode.com/problems/sort-colors/
### c code
```c=
void sortColors(int* nums, int numsSize){
int red=0;
int white=0;
int i;
for(i=0;i<numsSize;i++){
if(nums[i]==0)
red++;
else if(nums[i]==1)
white++;
}
int red_white=red+white;
for(i=0;i<numsSize;i++){
if(i<red)
nums[i]=0;
else if(i<red_white)
nums[i]=1;
else
nums[i]=2;
}
}
```
### assembly code(auto generated by compiler)
```s=
.file "leetcode.c"
.option nopic
.attribute arch, "rv32i2p0_m2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.section .rodata
.align 2
.LC2:
.string "%d\n"
.text
.align 2
.globl sortColors
.type sortColors, @function
sortColors:
addi sp,sp,-48
sw ra,44(sp)
sw s0,40(sp)
addi s0,sp,48
sw a0,-36(s0)
sw a1,-40(s0)
sw zero,-20(s0)
sw zero,-24(s0)
sw zero,-28(s0)
j .L2
.L5:
lw a5,-28(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
lw a5,0(a5)
bne a5,zero,.L3
lw a5,-20(s0)
addi a5,a5,1
sw a5,-20(s0)
j .L4
.L3:
lw a5,-28(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
lw a4,0(a5)
li a5,1
bne a4,a5,.L4
lw a5,-24(s0)
addi a5,a5,1
sw a5,-24(s0)
.L4:
lw a5,-28(s0)
addi a5,a5,1
sw a5,-28(s0)
.L2:
lw a4,-28(s0)
lw a5,-40(s0)
blt a4,a5,.L5
lw a4,-20(s0)
lw a5,-24(s0)
add a5,a4,a5
sw a5,-32(s0)
sw zero,-28(s0)
j .L6
.L10:
lw a4,-28(s0)
lw a5,-20(s0)
bge a4,a5,.L7
lw a5,-28(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
sw zero,0(a5)
j .L8
.L7:
lw a4,-28(s0)
lw a5,-32(s0)
bge a4,a5,.L9
lw a5,-28(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
li a4,1
sw a4,0(a5)
j .L8
.L9:
lw a5,-28(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
li a4,2
sw a4,0(a5)
.L8:
lw a5,-28(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
lw a5,0(a5)
mv a1,a5
lui a5,%hi(.LC2)
addi a0,a5,%lo(.LC2)
call printf
lw a5,-28(s0)
addi a5,a5,1
sw a5,-28(s0)
.L6:
lw a4,-28(s0)
lw a5,-40(s0)
blt a4,a5,.L10
#nop
#nop
lw ra,44(sp)
lw s0,40(sp)
addi sp,sp,48
jr ra
.size sortColors, .-sortColors
.section .rodata
.align 2
.LC0:
.word 2
.word 0
.word 0
.word 1
.align 2
.LC1:
.word 2
.word 0
.word 2
.word 1
.word 0
.word 1
.word 0
.word 1
.text
.align 2
.globl main
.type main, @function
main:
addi sp,sp,-96
sw ra,92(sp)
sw s0,88(sp)
addi s0,sp,96
li a5,2
sw a5,-40(s0)
sw zero,-36(s0)
li a5,1
sw a5,-32(s0)
li a5,3
sw a5,-20(s0)
addi a5,s0,-40
lw a1,-20(s0)
mv a0,a5
call sortColors
lui a5,%hi(.LC0)
addi a5,a5,%lo(.LC0)
lw a2,0(a5)
lw a3,4(a5)
lw a4,8(a5)
lw a5,12(a5)
sw a2,-56(s0)
sw a3,-52(s0)
sw a4,-48(s0)
sw a5,-44(s0)
li a5,4
sw a5,-24(s0)
addi a5,s0,-56
lw a1,-24(s0)
mv a0,a5
call sortColors
lui a5,%hi(.LC1)
addi a5,a5,%lo(.LC1)
lw a7,0(a5)
lw a6,4(a5)
lw a0,8(a5)
lw a1,12(a5)
lw a2,16(a5)
lw a3,20(a5)
lw a4,24(a5)
lw a5,28(a5)
sw a7,-88(s0)
sw a6,-84(s0)
sw a0,-80(s0)
sw a1,-76(s0)
sw a2,-72(s0)
sw a3,-68(s0)
sw a4,-64(s0)
sw a5,-60(s0)
li a5,8
sw a5,-28(s0)
addi a5,s0,-88
lw a1,-28(s0)
mv a0,a5
call sortColors
li a5,0
mv a0,a5
lw ra,92(sp)
lw s0,88(sp)
addi sp,sp,96
jr ra
.size main, .-main
.ident "GCC: (xPack GNU RISC-V Embedded GCC x86_64) 12.2.0"
```
### handwrite
```s=
.file "leetcode.c"
.option nopic
.attribute arch, "rv32i2p0_m2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.section .rodata
.align 2
.LC2:
.string "%d\n"
.text
.align 2
.globl sortColors
.type sortColors, @function
sortColors:
addi sp,sp,-48
sw ra,44(sp)
sw s0,40(sp)
addi s0,sp,48
sw a0,-36(s0)
sw a1,-40(s0)
#sw zero,-20(s0)
#sw zero,-24(s0)#white=0
addi a6,x0,0
sw zero,-28(s0)
j .L2
.L5:
lw a5,-28(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
lw a5,0(a5)
bne a5,zero,.L3
lw a5,-20(s0)
addi a5,a5,1
sw a5,-20(s0)
j .L4
.L3:
lw a5,-28(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
lw a4,0(a5)
li a5,1
bne a4,a5,.L4
#lw a5,-24(s0)
addi a6,a6,1
#sw a5,-24(s0)
.L4:
lw a5,-28(s0)
addi a5,a5,1
sw a5,-28(s0)
.L2:
lw a4,-28(s0)
lw a5,-40(s0)
blt a4,a5,.L5
lw a4,-20(s0)
#lw a5,-24(s0)
add a5,a4,a6
sw a5,-32(s0)
sw zero,-28(s0)
j .L6
.L10:
lw a4,-28(s0)
lw a5,-20(s0)
bge a4,a5,.L7
lw a5,-28(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
sw zero,0(a5)
j .L8
.L7:
lw a4,-28(s0)
lw a5,-32(s0)
bge a4,a5,.L9
lw a5,-28(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
li a4,1
sw a4,0(a5)
j .L8
.L9:
lw a5,-28(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
li a4,2
sw a4,0(a5)
.L8:
lw a5,-28(s0)
slli a5,a5,2
lw a4,-36(s0)
add a5,a4,a5
lw a5,0(a5)
mv a1,a5
lui a5,%hi(.LC2)
addi a0,a5,%lo(.LC2)
call printf
lw a5,-28(s0)
addi a5,a5,1
sw a5,-28(s0)
.L6:
lw a4,-28(s0)
lw a5,-40(s0)
blt a4,a5,.L10
#nop
#nop
lw ra,44(sp)
lw s0,40(sp)
addi sp,sp,48
jr ra
.size sortColors, .-sortColors
.section .rodata
.align 2
.LC0:
.word 2
.word 0
.word 0
.word 1
.align 2
.LC1:
.word 2
.word 0
.word 2
.word 1
.word 0
.word 1
.word 0
.word 1
.text
.align 2
.globl main
.type main, @function
main:
addi sp,sp,-96
sw ra,92(sp)
sw s0,88(sp)
addi s0,sp,96
li a5,2
sw a5,-40(s0)
sw zero,-36(s0)
li a5,1
sw a5,-32(s0)
li a5,3
sw a5,-20(s0)
addi a5,s0,-40
lw a1,-20(s0)
mv a0,a5
call sortColors
lui a5,%hi(.LC0)
addi a5,a5,%lo(.LC0)
lw a2,0(a5)
lw a3,4(a5)
lw a4,8(a5)
lw a5,12(a5)
sw a2,-56(s0)
sw a3,-52(s0)
sw a4,-48(s0)
sw a5,-44(s0)
li a5,4
sw a5,-24(s0)
addi a5,s0,-56
lw a1,-24(s0)
mv a0,a5
call sortColors
lui a5,%hi(.LC1)
addi a5,a5,%lo(.LC1)
lw a7,0(a5)
lw a6,4(a5)
lw a0,8(a5)
lw a1,12(a5)
lw a2,16(a5)
lw a3,20(a5)
lw a4,24(a5)
lw a5,28(a5)
sw a7,-88(s0)
sw a6,-84(s0)
sw a0,-80(s0)
sw a1,-76(s0)
sw a2,-72(s0)
sw a3,-68(s0)
sw a4,-64(s0)
sw a5,-60(s0)
li a5,8
sw a5,-28(s0)
addi a5,s0,-88
lw a1,-28(s0)
mv a0,a5
call sortColors
li a5,0
mv a0,a5
lw ra,92(sp)
lw s0,88(sp)
addi sp,sp,96
jr ra
.size main, .-main
.ident "GCC: (xPack GNU RISC-V Embedded GCC x86_64) 12.2.0"
```
### RTL sim

### ISS sim

### comparsion (using code from hw2)
|RTL sim cycle|opt|
|-|-|
|18790|no (only rely on compiler)|
|6201|handwrite assembly|
## Appendix
