owned this note
owned this note
Published
Linked with GitHub
# Assignment2: RISC-V Toolchain
contributed By < [SUE3K](https://github.com/SUE3K/computer_architecture_hw1/tree/main) >
## linkedlist
"linked list" is a common data structure


As shown in the two diagrams above, the concept is to use nodes to record, represent, and store data. Each node has three components: Data, Pointer, and Address. Additionally, each node's pointer points to the address of the next node, continuing until it points to Null, signifying the end of this simple linked list. The time complexity is **O(N)**
## Count leading zero
To calculate the number of consecutive zeros, counting from the Most Significant Bit (MSB) towards the right, until the first encountered '1' in a binary number
Ex: **0000000000000010 =14**
## Motivation
For this assignment, I choose the program Sum of Leading Zeros in Linked List by CLZ 劉智恩.
This topic was chosen for its academic challenge, optimization potential, real-world applications, and the opportunity to gain a deeper understanding of low-level programming and computer architecture.
## Implement
### Original C code
```c
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h> // using malloc functions
// def linkedlist structure
typedef struct Node {
uint32_t data; // 32 bits unsigned integers
struct Node* next;
} Node;
// calculate 32bits unsigned int count of leading zeros
uint16_t count_leading_zeros(uint32_t x)
{
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
/* count ones (population count) */
x -= ((x >> 1) & 0x55555555);
x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
x = ((x >> 4) + x) & 0x0f0f0f0f;
x += (x >> 8);
x += (x >> 16);
return (32 - (x & 0x1f)); // 32 bits unsigned int leading zeors
}
// calculate all linkedlists node clz and sum
uint64_t sum_of_leading_zeros(Node* head)
{
uint64_t sum = 0;
while (head != NULL) {
uint16_t leadingZeros = count_leading_zeros(head->data);
sum += leadingZeros;
head = head->next;
}
return sum;
}
int main()
{
// create a simple linked list
Node* head = NULL;
Node* node1 = malloc(sizeof(Node));
node1->data = 23;
node1->next = NULL;
head = node1;
Node* node2 = malloc(sizeof(Node));
node2->data = 15;
node2->next = NULL;
node1->next = node2;
Node* node3 = malloc(sizeof(Node));
node3->data = 1
;
node3->next = NULL;
node2->next = node3;
// calculate sum of linkedlist node leading zeors for 32bits unsigned integers
uint32_t totalLeadingZeros = sum_of_leading_zeros(head);
printf("Sum of Leading Zeros: %llu\n", totalLeadingZeros);
// release linked list node memory
while (head != NULL) {
Node* temp = head;
head = head->next;
free(temp);
}
return 0;
}
```
### Modified C code
```c
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h> // using malloc functions
// count cycle
typedef uint64_t ticks;
static inline ticks getticks(void)
{ uint64_t result;
uint32_t l, h, h2;
asm volatile(
"rdcycleh %0\n"
"rdcycle %1\n"
"rdcycleh %2\n"
"sub %0, %0, %2\n"
"seqz %0, %0\n"
"sub %0, zero, %0\n"
"and %1, %1, %0\n"
: "=r"(h), "=r"(l), "=r"(h2));
result = (((uint64_t) h) << 32) | ((uint64_t) l);
return result;
}
// def linkedlist structure
typedef struct Node {
uint32_t data; // 32 bits unsigned integers
struct Node* next;
} Node;
// calculate 32bits unsigned int count of leading zeros
uint16_t count_leading_zeros(uint32_t x)
{
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
/* count ones (population count) */
x -= ((x >> 1) & 0x55555555);
x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
x = ((x >> 4) + x) & 0x0f0f0f0f;
x += (x >> 8);
x += (x >> 16);
return (32 - (x & 0x1f)); // 32 bits unsigned int leading zeors
}
// calculate all linkedlists node clz and sum
uint64_t sum_of_leading_zeros(Node* head)
{
uint64_t sum = 0;
while (head != NULL) {
uint16_t leadingZeros = count_leading_zeros(head->data);
sum += leadingZeros;
head = head->next;
}
return sum;
}
int main()
{
ticks t0 = getticks();
// create a simple linked list
Node* head = NULL;
Node* node1 = malloc(sizeof(Node));
node1->data = 23;
node1->next = NULL;
head = node1;
Node* node2 = malloc(sizeof(Node));
node2->data = 15;
node2->next = NULL;
node1->next = node2;
Node* node3 = malloc(sizeof(Node));
node3->data = 1
;
node3->next = NULL;
node2->next = node3;
// calculate sum of linkedlist node leading zeors for 32bits unsigned integers
uint64_t totalLeadingZeros = sum_of_leading_zeros(head);
printf("Sum of Leading Zeros: %llu\n", totalLeadingZeros);
// release linked list node memory
while (head != NULL) {
Node* temp = head;
head = head->next;
free(temp);
}
ticks t1 = getticks();
printf("elapsed cycle: %" PRIu64 "\n", t1 - t0); //cycle number
return 0;
}
```
### Makefile
we need to adjust original makefile from asm-hello
```shell
.PHONY: clean
include /home/ianli/rv32emu/mk/toolchain.mk
CFLAGS = -march=rv32i -mabi=ilp32 -O3
ASFLAGS = -march=rv32i -mabi=ilp32
LDFLAGS = --oformat=elf32-littleriscv
%.S: %.c
$(CROSS_COMPILE)gcc $(CFLAGS) -o $@ -S $<
%.o: %.S
$(CROSS_COMPILE)as $(ASFLAGS) -o $@ $<
all: hw1_liu.elf
hw1_liu.S: hw1_liu.c
$(CROSS_COMPILE)gcc $(CFLAGS) -o $@ -S $<
hw1_liu.elf: hw1_liu.o
$(CROSS_COMPILE)gcc -o $@ $<
clean:
$(RM) hw1_liu.elf hw1_liu.o hw1_liu.S
```
so we can "make", generate .s .o and .elf
<s>

</s>
:::warning
Don't put the screenshots which contain plain text only.
:notes: jserv
:::
### Output cycle count
we need to add "ticks" into our code for counting cycle.
### after we modified
```c
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h> // using malloc functions
// count cycle
typedef uint64_t ticks;
static inline ticks getticks(void)
{
uint64_t result;
uint32_t l, h, h2;
asm volatile(
"rdcycleh %0\n"
"rdcycle %1\n"
"rdcycleh %2\n"
"sub %0, %0, %2\n"
"seqz %0, %0\n"
"sub %0, zero, %0\n"
"and %1, %1, %0\n"
: "=r"(h), "=r"(l), "=r"(h2));
result = (((uint64_t) h) << 32) | ((uint64_t) l);
return result;
}
// def linkedlist structure
typedef struct Node {
uint32_t data; // 32 bits unsigned integers
struct Node* next;
} Node;
// calculate 32bits unsigned int count of leading zeros
uint16_t count_leading_zeros(uint32_t x)
{
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
/* count ones (population count) */
x -= ((x >> 1) & 0x55555555);
x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
x = ((x >> 4) + x) & 0x0f0f0f0f;
x += (x >> 8);
x += (x >> 16);
return (32 - (x & 0x1f)); // 32 bits unsigned int leading zeors
}
// calculate all linkedlists node clz and sum
uint64_t sum_of_leading_zeros(Node* head)
{
uint64_t sum = 0;
while (head != NULL) {
uint16_t leadingZeros = count_leading_zeros(head->data);
sum += leadingZeros;
head = head->next;
}
return sum;
}
int main()
{
ticks t0 = getticks();
// create a simple linked list
Node* head = NULL;
Node* node1 = malloc(sizeof(Node));
node1->data = 23;
node1->next = NULL;
head = node1;
Node* node2 = malloc(sizeof(Node));
node2->data = 15;
node2->next = NULL;
node1->next = node2;
Node* node3 = malloc(sizeof(Node));
node3->data = 1;
node3->next = NULL;
node2->next = node3;
// calculate sum of linkedlist node leading zeors for 32bits unsigned integers
uint64_t totalLeadingZeros = sum_of_leading_zeros(head);
printf("Sum of Leading Zeros: %ld\n", totalLeadingZeros);
// release linked list node memory
while (head != NULL) {
Node* temp = head;
head = head->next;
free(temp);
}
ticks t1 = getticks();
printf("elapsed cycle: %" PRIu64 "\n", t1 - t0); //cycle number
return 0;
}
```
### Obtain the O0-3
O0:

:::danger
Avoid using screenshots that solely contain plain text. Here are the reasons why:
1. Text-based content is more efficiently searchable than having to browse through images iteratively.
2. The rendering engine of HackMD can consistently generate well-structured layouts with annotated text instead of relying on arbitrary pictures.
3. It provides a more accessible and user-friendly experience for individuals with visual impairments.
:notes: jserv
:::
show the ELF Header
```
riscv-none-elf-readelf -h asm-hw2/hw1_liu.elf
```
```
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x100c2
Start of program headers: 52 (bytes into file)
Start of section headers: 69404 (bytes into file)
Flags: 0x1, RVC, soft-float ABI
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
```
show the text size
```
riscv-none-elf-size asm-hw2/hw1_liu.elf
```
size
```
text data bss dec hex filename
51958 1876 1528 55362 d842 asm-hw2/hw1_liu.elf
```
O1:

```
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x100c2
Start of program headers: 52 (bytes into file)
Start of section headers: 69404 (bytes into file)
Flags: 0x1, RVC, soft-float ABI
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
```
size
```
text data bss dec hex filename
51470 1876 1528 54874 d65a asm-hw2/hw1_liu.elf
```
O2:

```
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x101a2
Start of program headers: 52 (bytes into file)
Start of section headers: 69420 (bytes into file)
Flags: 0x1, RVC, soft-float ABI
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
```
size
```
text data bss dec hex filename
51566 1876 1528 54970 d6ba asm-hw2/hw1_liu.elf
```
O3:

```
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: RISC-V
Version: 0x1
Entry point address: 0x10276
Start of program headers: 52 (bytes into file)
Start of section headers: 69396 (bytes into file)
Flags: 0x1, RVC, soft-float ABI
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 3
Size of section headers: 40 (bytes)
Number of section headers: 15
Section header string table index: 14
```
size
```
text data bss dec hex filename
51744 1876 1528 55148 d76c asm-hw2/hw1_liu.elf
```
### handwrite assembly
I try to reducing some text size
original
```
srli t0,s0,1
or s0,s0,t0
srli t0,s0,2
or s0,s0,t0
srli t0,s0,4
or s0,s0,t0
srli t0,s0,8
or s0,s0,t0
srli t0,s0,16
or s0,s0,t0
```
after optimize
```
li s3, 1
li s4, 0
li s5, 5
counter:
beq s4, x0, op_main
sll s3, s3, s4
op_main:
addi s4, s4, 1
srl t0,s0, s3
or s0,s0,t0
bne s4, s5, counter
```
and try to add some intruction into the makefile
```
GCCFLAGS := -fdata-sections -ffunction-sections
LDFLAGS := -Wl,--gc-sections
```
## result
| version | cycle count | text |
|---------|-------------|---------|
|-O0|3123|51958|
|-O1|2868|51470|
|-O2|2838|51566|
|-O3|2831|51744|
:::warning
Show me the handwritten RISC-V assembly code.
:notes: jserv
:::