# Assignment1: RISC-V Assembly and Instruction Pipeline
contributed by < [`DotandLog`](https://github.com/DotandLog) >
###### tags: `RISC-V`, `jserv`
## Reverse Linked List
## Implementation
You can find the source code [here](https://github.com/kaeteyaruyo/Computer-Architecture/tree/master/hw1)
### C code
#### struct ListNode* reverseList(struct ListNode* head)
```c=
struct ListNode* reverseList(struct ListNode* head) {
struct ListNode* curr = head;
struct ListNode* next = NULL;
struct ListNode* prev = NULL;
while (curr != NULL) {
next = curr->next;
curr->next = prev;
prev = curr;
curr = next;
}
return prev;
}
```
#### Full Program
```c=
#include <stdio.h>
#include <stdlib.h>
typedef struct ListNode {
int val;
struct ListNode* next;
} * Node;
struct ListNode* reverseList(struct ListNode* head) {
struct ListNode* curr = head;
struct ListNode* next = NULL;
struct ListNode* prev = NULL;
while (curr != NULL) {
next = curr->next;
curr->next = prev;
prev = curr;
curr = next;
}
return prev;
}
Node initListNode(int input[], int size) {
Node head = malloc(sizeof(Node));
Node curr = head;
head->val = input[0];
for (int i = 1; i < size; i++) {
Node newNode = malloc(sizeof(Node));
newNode->val = input[i];
curr->next = newNode;
curr = newNode;
}
curr->next = NULL;
return head;
}
void printNode(Node head, char InorOut) {
Node curr = head;
(InorOut == 'I') ? printf("Input:[") : printf("Output:[");
while (curr) {
printf("%d,", curr->val);
curr = curr->next;
}
printf("]\n");
}
void freeNode(Node head) {
while (head) {
Node temp = head;
head = head->next;
free(temp);
}
}
void runTest(Node head) {
printNode(head, 'I');
head = reverseList(head);
printNode(head, 'O');
freeNode(head);
}
int main() {
Node head1, head2, head3;
int test1[] = {1, 2, 3, 4, 5};
int test2[] = {1, 2};
int test3[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
head1 = initListNode(test1, 5);
head2 = initListNode(test2, 2);
head3 = initListNode(test3, 10);
runTest(head1);
runTest(head2);
runTest(head3);
}
```
### Assembly code
In this example,
```clike=
.data
test1:.word 1, 2, 3, 4, 5
test2:.word 1, 2
test3:.word 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
start_input_line:.string "Input:["
start_output_line:.string "Output:["
endline:.string "]\n"
.text
main:
la a0, test1 # a0 = test1[0]
addi a1, a1, 5 # a1 = test1_sizes
jal ra, initListNode
jal ra, runTest
la a0, test2 # a0 = test2[0]
addi a1, a1, 2 # a1 = test2_sizes
jal ra, initListNode
jal ra, runTest
la a0, test3 # a0 = test3[0]
addi a1, a1, 10 # a1 = test3_sizes
jal ra, initListNode
jal ra, runTest
j exit
initListNode:
addi sp, sp, -8
sw ra, 0(sp)
addi t0, a1, 0
mv t1, a0
lw t2, 0(t1) # load test[0]'s value
li s0, 0x20000000 # s0 store the address of Node
sw s0, 4(s0) # load the list head to the stack
jal ra node_alloc # alloc memory for node
sw t3, s0 # Node curr = head
sw t2, 0(s0) # head->val = test[0]
addi s0, s0, 8 #offset to next arry
li s1, 1 # set i = 1
loop:
jal ra node_alloc # Node newNode = malloc
addi t1, t1, 4 # find the next value in test by 4(int)
lw t2, 0(t1) # load test[i+1]
sw t2, 0(s0) # newNode->val = test[i]
sw s0, 4(t3) # curr->next = newNode
addi s0, s0, 8
addi t3, t3, 8
addi s1, 1 # i++
bne s1,t0 loop # for statement
addi s0, s0, 4
sw x0, 4(t3)
lw ra, 0(sp)
lw a0, 4(sp)
addi sp, sp, 8
jr ra
runTest:
la a0, start_input_line
li a7, 4
ecall
jal ra printNode
jal ra reverseNode
la a0, end_input_line
li a7, 4
ecall
jal ra printNode
jal ra freeNode
jr ra
printNode:
lw a0, 0(t1)
li a7, 1
ecall
li a0, 9
li a7, 11
ecall
lw t1,4(t1)
bne t1, x0 printNode
li a0, 10
li a7, 11
ecall
ja ra
reverseNode:
lw a0, 0(a0)
beqz a0, done
addi t0, t0, 0
loop2:
lw t1, 0(a0)
sw t0, 0(a0)
addi t0, a0, 0
addi a0, t1, 0
bnez t1,loop2
done:
jr ra
freeNode:
node_alloc:
li a7, 214
ecall
jr ra
exit:
li a7, 10
ecall
```
### Reviced ASM
```
.data
test1:.word 1, 2, 3, 4, 5
len1: .word 5
test2:.word 1, 2
len2: .word 2
test3:.word 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
len3: .word 10
start_input_line:.string "Input:["
start_output_line:.string "Output:["
endline:.string "]\n"
.text
main:
la a0, test1 # a0 = test1[0]
lw a1, len1 # a1 = len(test1)
jal ra, initListNode
jal ra, runTest
la a0, test2 # a0 = test2[0]
lw a1, len2
jal ra, initListNode
jal ra, runTest
la a0, test3 # a0 = test3[0]
lw a1, len3
jal ra, initListNode
jal ra, runTest
j exit
initListNode:
addi sp, sp, -8
sw ra, 0(sp)
mv t0, a1 # t0 is the size of linked list
mv t1, a0 # t1 is the address of array
lw t2, 0(t1) # load test[0]'s value
li s0, 0x20000000 # s0 save the address of Node
sw s0, 4(sp) # load the list head to the stack #
jal ra, node_alloc # alloc memory for node
mv t3, s0 # Node curr = head #
sw t2, 0(s0) # head->val = test[0]
addi s0, s0, 8 # offset to next array (8 is the ListNode size)
li s1, 1 # set i = 1
loop:
jal ra, node_alloc # Node newNode = malloc
addi t1, t1, 4 # find the next value in test by 4(int)
lw t2, 0(t1) # load test[i+1]
sw t2, 0(s0) # newNode->val = test[i]
sw s0, 4(t3) # curr->next = newNode
addi s0, s0, 8 # To get the next
addi t3, t3, 8
addi s1,s1, 1 # i++
bne s1,t0, loop # for statement
addi s0, s0, 4
sw x0, 4(t3)
lw ra, 0(sp)
lw a0, 4(sp)
addi sp, sp, 8
jr ra
runTest:
addi sp, sp, -8
sw ra,0(sp)
sw a0,4(sp)
la a0, start_input_line
li a7, 4
ecall
lw t1,4(sp)
jal ra, printNode
lw a0,4(sp)
jal ra, reverseNode
sw a0,4(sp) # save the new head to stack
la a0, start_output_line
li a7, 4
ecall
lw t1,4(sp)
jal ra, printNode
jal ra, freeNode
lw ra,0(sp)
addi sp, sp, 8
jr ra
printNode:
lw a0, 0(t1)
li a7, 1
ecall
li a0, 9
li a7, 11
ecall
lw t1,4(t1)
bne t1, x0, printNode
li a0, 10
li a7, 11
ecall
jr ra
reverseNode:
mv t1,a0 # t1 is curr
beqz a0, done
addi t4,x0,0
loop2:
lw t2,4(t1) # t2 is next
sw t4,4(t1)
mv t4,t1 # t4 is prev
mv t1,t2
bnez t1,loop2
done:
mv a0,t4 # return prev
jr ra
freeNode:
node_alloc:
li a7, 214
ecall
jr ra
exit:
li a7, 10
ecall
```
## Resulted

## Analysis
We test our code using [Ripes](https://github.com/mortbopet/Ripes) simulator.
### 5-stage pipelined processor
Now we can choose a processor to run this code. Ripes provide four kinds of processor for us to choose, including **single cycle processor**, **5-stage processor**, **5-stage with hazard detection** and **5-stage with forward and hazard detection**. Here we choose the **5 stage processor**. Its block diagram look like this:

The "5-stage" means this processor using five-stage pipeline to parallelize instructions. The stages are:
1. Instruction fetch (IF)
2. Instruction decode and register fetch (ID)
3. Execute (EX)
4. Memory access (MEM)
5. Register write back (WB)
- Instruction fetch (IF)

- pc is stored the next addr to get the next instr.
- instr. memory is for the next branch instr.
- compressed decoder is for general instr fetch.
- Instruction decode and register fetch (ID) & Execute (EX)

- generate control signals for the opcode bits.
- read source operands from the register file (RF).
- rpdate pipeline registers.
- Memory access (MEM) & Register write back (WB)

- load/store address: ALU outcome
- vontrol signals determine read or write access
- update register file
to be updated...
## Reference
* [Dot product - Wikipedia](https://en.wikipedia.org/wiki/Dot_product)
* [RISC-V Instruction Set Manual](https://riscv.org//wp-content/uploads/2017/05/riscv-spec-v2.2.pdf)
* [RISC-V Green Card](https://www.cl.cam.ac.uk/teaching/1617/ECAD+Arch/files/docs/RISCVGreenCardv8-20151013.pdf)
* [Environmental Calls](https://github.com/mortbopet/Ripes/wiki/Environment-calls)