# Homework2: RISC-V Toolchain ###### tags: `2022 computer architecture` ## Rewrite [Contains Duplicate](https://leetcode.com/problems/contains-duplicate/) I choose the **Contains Duplicate** from [莊集](https://hackmd.io/@y8jRQNyoRe6WG-qekloIlA/Sk0PXEDzj) **Motivation**: In the original C code, author used heap sort to sort the array. I want to improve his works using emu32i. I want to compare between hand writing code and generated code under differrent setting. ```c= #include<stdio.h> #include<stdbool.h> void swap(int *x, int *y){ int temp = *x; *x = *y; *y = temp; } void heapify(int *nums, int i, int numsSize){ int currValue = nums[i]; int j = i * 2 + 1; // left child int parent; while (j < numsSize){ if (j < (numsSize - 1)){ if (nums[j] < nums[j + 1]){ j = j + 1; } } if (currValue >= nums[j]) break; else{ if (j % 2 == 0) parent = j / 2 - 1; else parent = j / 2; nums[parent] = nums[j]; j = j * 2 + 1; } } if (j % 2 == 0) parent = j / 2 - 1; else parent = j / 2; nums[parent] = currValue; } void heapSort(int *nums, int numsSize){ for (int i = numsSize / 2 - 1; i >= 0; i--){ heapify(nums, i, numsSize); } for (int i = numsSize - 1; i >= 0; i--){ swap(&nums[0], &nums[i]); heapify(nums, 0, i); } } bool containsDuplicate(int* nums, int numsSize){ heapSort(nums, numsSize); for (int i = 0; i < numsSize - 1; i++){ if (nums[i] == nums[i + 1]) return true; } return false; } int main(){ int question1[] = {1, 4, 5, 6, 2, 3}; bool answer1 = false; printf("Question1 "); if (answer1 == containsDuplicate(question1, (sizeof(question1)/sizeof(question1[0])))) printf("Accepted\n"); else printf("Failed\n"); int question2[] = {3, 2, 1, 4, 2, 1}; bool answer2 = true; printf("Question2 "); if (answer2 == containsDuplicate(question2, (sizeof(question2)/sizeof(question2[0])))) printf("Accepted\n"); else printf("Failed\n"); int question3[] = {-1, -2, 3, 4, -2, -1}; bool answer3 = true; printf("Question3 "); if (answer3 == containsDuplicate(question3, (sizeof(question3)/sizeof(question3[0])))) printf("Accepted\n"); else printf("Failed\n"); return 0; } ``` Result ![](https://i.imgur.com/IKFuZHK.png) ## Compare Assembly Code ### Original Code (From [莊集](https://hackmd.io/@y8jRQNyoRe6WG-qekloIlA/Sk0PXEDzj)) ```clike= .data q1: .word 1, 4, 5, 6, 2, 3 q1Length: .word 6 ans1: .word 0 q2: .word 3, 2, 1, 4, 2, 1, 5 q2Length: .word 7 ans2: .word 1 q3: .word -1, -2, 3, 4, -2 q3Length: .word 5 ans3: .word 1 originArr: .string "originArr: " sortedArr: .string "sortedArr: " newLine: .string "\n" Question1: .string "Question1\n" Question2: .string "Question2\n" Question3: .string "Question3\n" True: .string "\nTrue" False: .string "\nFalse" Accepted: .string "\nAnswer Accepted" Failed: .string "\nAnswer Failed" .text main: ## question 1 la a0, Question1 addi a7, zero, 4 ecall la a0, q1 # get head addr of q1 lw a1, q1Length # get q1 size mv s0, a0 # keep q1 head addr in s0 mv s1, a1 jal containsDuplicate # pass array head addr and size to containsDuplicate jal autoTest # test the result with the answer j end # end the program containsDuplicate: # bool containsDuplicate(*nums, numsSize), nums:a0, numsSize:a1 addi sp, sp, -28 # store ra, s0, s1 sw ra, 0(sp) # store ra sw s0, 4(sp) # store s0 sw s1, 8(sp) # store s1 sw s2, 12(sp) # store s2 sw s3, 16(sp) # store s3 sw s4, 20(sp) # store s4 sw s5, 24(sp) # store s5 mv s0, a0 # keep array head addr in s0 mv s1, a1 # keep array size in s1 ## test la a0, originArr addi a7, zero, 4 ecall mv a0, s0 mv a1, s1 ecall jal printArray #j end ## test jal beforeHeapSort # call heapSort to sort the array from little to large ## do swap nums[0] and nums[i] and heapify while (i' = numsSize - 1; i >= 0; i--) mv a0, s0 # a0 = nums mv a1, s1 # a1 = numsSize addi t3, s1, -1 # i'(t3) = numsSize - 1 mv a2, t3 # a2 = i' jal swapAndHeapify # do swap and heapify ## test la a0, newLine addi a7, zero, 4 ecall la a0, sortedArr addi a7, zero, 4 ecall mv a0, s0 mv a1, s1 jal printArray #j end ## test ## compare adjacent element to check if they are identical addi s2, zero, 0 # let s2 be index i of nums[i], and initialized with i = 0 addi s3, s3, 1 # let s3 be index j of nums[i + 1], and initialized with j = i + 1 containsDuplicateWhile: bge s3, a1, containsDuplicateFalse # while j < numsSize then do the comparaion with nums[i] and nums[i + 1] slli s4, s2, 2 # let s4 be offset of nums + 4 * i, which means nums[i] add s4, a0, s4 # s4 = nums + 4 * i lw s4, 0(s4) # s4 = nums[i] slli s5, s3, 2 # let s4 be offset of nums + 4 * j, which means nums[j] == nums[i + 1] add s5, a0, s5 # s4 = nums + 4 * i lw s5, 0(s5) # s4 = nums[j] = nums[i + 1] beq s4, s5, containsDuplicateTrue # True if there is identical elements sit together addi s2, s2, 1 # i++ addi s3, s3, 1 # j++ j containsDuplicateWhile containsDuplicateTrue: la a0, True addi a7, zero, 4 ecall addi a0, zero, 1 j containsDuplicateSkip containsDuplicateFalse: la a0, False addi a7, zero, 4 ecall addi a0, zero, 0 containsDuplicateSkip: #mv a0, s0 # restore nums head addr #mv a1, s1 # restore numsSize lw ra, 0(sp) # store ra lw s0, 4(sp) # store s0 lw s1, 8(sp) # store s1 lw s2, 12(sp) # store s2 lw s3, 16(sp) # store s3 lw s4, 20(sp) # store s4 lw s5, 24(sp) # store s5 addi sp, sp, 28 jr ra beforeHeapSort: addi sp, sp -16 # store ra, s0, s1, s2 sw ra, 0(sp) # store ra because I call heapify sw s0, 4(sp) # store s0 because I use a0 sw s1, 8(sp) # store s1 because I use a1 sw s2, 12(sp) # store s2 because I use s2 mv s0, a0 # keep array head addr in s0 mv s1, a1 # keep numsSize in a1 srli t6, a1, 1 # i(t6) = (numsSize / 2) - 1 addi t6, t6, -1 callHeapifyWhile: bge t6, zero, heapSortSkip # while i >= 0, then do heapify lw ra, 0(sp) # restore ra lw s0, 4(sp) # restore s0 lw s1, 8(sp) # restore s1 lw s2, 12(sp) # restore s2 addi sp, sp, 16 jr ra # return from heapSort heapSortSkip: mv a0, s0 # a0 = *nums mv a1, t6 # a1 = i mv a2, s1 # a2 = numsSize jal heapify # call heapify addi t6, t6, -1 # i-- j callHeapifyWhile # do next heapify while i still >= 0 swapAndHeapify: addi sp, sp, -24 # store ra, s0, s1, s2, s3, s4 sw ra, 0(sp) # store ra sw s0, 4(sp) # store s0 sw s1, 8(sp) # store s1 sw s2, 12(sp) # store s2 sw s3, 16(sp) # store s3 sw s4, 20(sp) # store s4 mv s0, a0 # keep nums head addr in s0 mv s1, a1 # keep numsSize in s1 mv s2, a2 # keep i' in s2 swayAndHeapifyWhile: bge s2, zero, swapAndHeapifySkip # while i >= 0 then do swap and heapify ## test #mv a0, s0 # test #mv a1, s1 # test #jal printArray # test #j end # test ## test lw ra, 0(sp) # restore ra lw s0, 4(sp) # restore s0 lw s1, 8(sp) # restore s1 lw s2, 12(sp) # restore s2 lw s3, 16(sp) # restore s3 lw s4, 20(sp) # restore s4 addi sp, sp, 24 jr ra swapAndHeapifySkip: addi s3, s0, 0 # let s3 be 0 and get nums[0] slli s4, s2, 2 # let s4 be i' and get nums[i'] add s4, s0, s4 # s4 = nums + 4 * i' ## call swap mv a0, s3 mv a1, s4 jal swap # swap(&nums[0], &nums[i]) ## call heapify mv a0, s0 mv a1, s2 jal beforeHeapSort ## test #mv a0, s0 # test #mv a1, s1 # test #jal printArray # test #j end # test ## test addi s2, s2, -1 # i(s2)-- j swayAndHeapifyWhile heapify: # void heapify(*nums, i, numsSize), nums:a0, i:a1, numsSize:a2 addi sp, sp -8 # store s10, s11 sw s10, 0(sp) # store s10 sw s11, 4(sp) # store s11 slli t5, a1, 2 # offset in array with i * 4 add t5, a0, t5 # add offset to base addr: (a0 + 4i) lw t5, 0(t5) # let t5 be currValue = *(a0 + 4i) slli t4, a1, 1 # let t4 be j = i * 2 + 1, means left child addi t4, t4, 1 heapifyWhile: bge t4, a2, assignCurrVal # while j < numsSize do adjust parent node addi t3, a2, -1 # if j < (numsSize - 1) && nums[j] < nums[j + 1] then j++ slt t3, t4, t3 # if j < numsSize - 1 then keep doing beq t3, zero, heapifySkip addi s11, t4, 1 # let s11 be dereference of *(nums + (j + 1)) slli s11, s11, 2 # s11 = 4 * (j + 1) add s11, a0, s11 # s11 = nums + 4 * (j + 1) lw s11, 0(s11) # s11 = *(nums + 4(j + 1)) = nums[j + 1] slli s10, t4, 2 # s10 = 4 * j add s10, a0, s10 # s10 = (num + 4j) lw s10, 0(s10) # s10 = *(num + 4j) = nums[j] slt t3, s10, s11 # if nums[j] < nums[j + 1] ? beq t3, zero, heapifySkip # if nums[j] < nums[j + 1] then j++ addi t4, t4, 1 # j++ heapifySkip: slli s10, t4, 2 # let s10 = 4 * j add s10, a0, s10 # s10 = (nums + 4j) lw s10, 0(s10) # s10 = *(nums + 4j) slt s10, t5, s10 # if (currValue >= nums[j]) then break beq s10, zero, assignCurrVal andi t3, t4, 1 # if (j % 2 == 0) then parent = j / 2 - 1 beq t3, zero, innerEvenAssign # else if (j % 2 == 1) then parent = j / 2 j innerOddAssign innerEvenAssign: srli t3, t4, 1 # let t3 be parent = j / 2 - 1 addi t3, t3, -1 j innerOddAssignSkip innerOddAssign: srli t3, t4, 1 # let t3 be parent = j / 2 innerOddAssignSkip: slli t3, t3, 2 # t3 = parent * 4 add t3, a0, t3 # t3 = (nums + 4 * parent) slli s11, t4, 2 # let s11 = 4 * j add s11, a0, s11 # s11 = nums + 4j lw s11, 0(s11) # s11 = nums[j] sw s11, 0(t3) # nums[parent] = nums[j] slli t4, t4, 1 # j = j * 2 + 1 addi t4, t4, 1 j heapifyWhile assignCurrVal: andi t3, t4, 1 # if (j % 2 == 0) then parent = j / 2 - 1 beq t3, zero, evenAssign # else if (j % 2 == 1) then parent = j / 2 j oddAssign evenAssign: srli t3, t4, 1 # let t3 be parent = j / 2 - 1 addi t3, t3, -1 j oddAssignSkip oddAssign: srli t3, t4, 1 # let t3 be parent = j / 2 oddAssignSkip: slli t3, t3, 2 # t3 = parent * 4 add t3, a0, t3 # t3 = (nums + 4 * parent) sw t5, 0(t3) # nums[parent] = currValue lw s10, 0(sp) # restore s10 lw s11, 4(sp) # restore s11 addi sp, sp, 8 jr ra swap: # void swam(*x, *y) addi sp, sp, -8 # store s0, s1 sw s0, 0(sp) # store s0 sw s1, 4(sp) # store s1 lw s0, 0(a0) # s0 = nums[0] lw s1, 0(a1) # s1 = nums[i] sw s0, 0(a1) # nums[i] = nums[0] sw s1, 0(a0) # nums[0] = nums[i] lw s0, 0(sp) # restore s0 lw s1, 4(sp) # restore s1 addi sp, sp, 8 jr ra printArray: # void printArray(*nums, numsSize), nums:a0, numsSize:a1 addi sp, sp, -12 # store ra, s0, s1 sw ra, 0(sp) # store ra sw s0, 4(sp) # store s0 sw s1, 8(sp) # store s1 mv s0, a0 # keep nums head addr in s0 mv s1, a1 # keep numsSize in s1 addi t0, zero, 0 # i = 0 mv t1, a0 # keep nums head addr in t1 printArrayWhile: blt t0, a1, printArraySkip # while i < numsSize, then do print array mv a0, s0 # restore nums head addr mv a1, s1 # restore numsSize lw ra, 0(sp) # restore ra lw s0, 4(sp) # restore s0 lw s1, 8(sp) # restore s1 addi sp, sp, 12 jr ra # return if i !< numsSize printArraySkip: lw a0, 0(t1) # a0 = t1[i] addi a7, zero, 1 # print number ecall addi t0, t0, 1 # i++ addi t1, t1, 4 # t1 += 4, means iterate to next element in int array j printArrayWhile autoTest: addi sp, sp, -4 # store s0 sw s0, 0(sp) # store s0 la s0, ans1 lw s0, 0(s0) beq s0, a0, autoTestAccepted # if ans == your answer then accepted j autoTestFailed autoTestAccepted: la a0, Accepted addi a7, zero, 4 ecall j autoTestSkip autoTestFailed: la a0, Failed addi a7, zero, 4 ecall autoTestSkip: lw s0, 0(s0) # restore s0 addi sp, sp, 4 # restore s0 jr ra end: addi a7, zero, 10 ecall ``` #### Observation - Register Used: 16 - sx register: s0 s1 s2 s3 s4 s10 s11 - tx register: t0 t1 t2 t3 t4 t5 t6 - ax register: a0 a7 - other: ra zero ra - Stack used(bytes): 0 ### -O0 No Optimized Assembly Code #### Terminal command ```shell riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -O0 -o ContainDuplicate_0 tests/ContainDuplicate.c ``` ```clike= 00010184 <swap>: 10184: fd010113 addi sp,sp,-48 10188: 02812623 sw s0,44(sp) 1018c: 03010413 addi s0,sp,48 10190: fca42e23 sw a0,-36(s0) 10194: fcb42c23 sw a1,-40(s0) 10198: fdc42783 lw a5,-36(s0) 1019c: 0007a783 lw a5,0(a5) 101a0: fef42623 sw a5,-20(s0) 101a4: fd842783 lw a5,-40(s0) 101a8: 0007a703 lw a4,0(a5) 101ac: fdc42783 lw a5,-36(s0) 101b0: 00e7a023 sw a4,0(a5) 101b4: fd842783 lw a5,-40(s0) 101b8: fec42703 lw a4,-20(s0) 101bc: 00e7a023 sw a4,0(a5) 101c0: 00000013 nop 101c4: 02c12403 lw s0,44(sp) 101c8: 03010113 addi sp,sp,48 101cc: 00008067 ret 000101d0 <heapify>: 101d0: fd010113 addi sp,sp,-48 101d4: 02812623 sw s0,44(sp) 101d8: 03010413 addi s0,sp,48 101dc: fca42e23 sw a0,-36(s0) 101e0: fcb42c23 sw a1,-40(s0) 101e4: fcc42a23 sw a2,-44(s0) 101e8: fd842783 lw a5,-40(s0) 101ec: 00279793 slli a5,a5,0x2 101f0: fdc42703 lw a4,-36(s0) 101f4: 00f707b3 add a5,a4,a5 101f8: 0007a783 lw a5,0(a5) 101fc: fef42223 sw a5,-28(s0) 10200: fd842783 lw a5,-40(s0) 10204: 00179793 slli a5,a5,0x1 10208: 00178793 addi a5,a5,1 1020c: fef42623 sw a5,-20(s0) 10210: 0e00006f j 102f0 <heapify+0x120> 10214: fd442783 lw a5,-44(s0) 10218: fff78793 addi a5,a5,-1 1021c: fec42703 lw a4,-20(s0) 10220: 04f75063 bge a4,a5,10260 <heapify+0x90> 10224: fec42783 lw a5,-20(s0) 10228: 00279793 slli a5,a5,0x2 1022c: fdc42703 lw a4,-36(s0) 10230: 00f707b3 add a5,a4,a5 10234: 0007a703 lw a4,0(a5) 10238: fec42783 lw a5,-20(s0) 1023c: 00178793 addi a5,a5,1 10240: 00279793 slli a5,a5,0x2 10244: fdc42683 lw a3,-36(s0) 10248: 00f687b3 add a5,a3,a5 1024c: 0007a783 lw a5,0(a5) 10250: 00f75863 bge a4,a5,10260 <heapify+0x90> 10254: fec42783 lw a5,-20(s0) 10258: 00178793 addi a5,a5,1 1025c: fef42623 sw a5,-20(s0) 10260: fec42783 lw a5,-20(s0) 10264: 00279793 slli a5,a5,0x2 10268: fdc42703 lw a4,-36(s0) 1026c: 00f707b3 add a5,a4,a5 10270: 0007a783 lw a5,0(a5) 10274: fe442703 lw a4,-28(s0) 10278: 08f75463 bge a4,a5,10300 <heapify+0x130> 1027c: fec42783 lw a5,-20(s0) 10280: 0017f793 andi a5,a5,1 10284: 02079063 bnez a5,102a4 <heapify+0xd4> 10288: fec42783 lw a5,-20(s0) 1028c: 01f7d713 srli a4,a5,0x1f 10290: 00f707b3 add a5,a4,a5 10294: 4017d793 srai a5,a5,0x1 10298: fff78793 addi a5,a5,-1 1029c: fef42423 sw a5,-24(s0) 102a0: 0180006f j 102b8 <heapify+0xe8> 102a4: fec42783 lw a5,-20(s0) 102a8: 01f7d713 srli a4,a5,0x1f 102ac: 00f707b3 add a5,a4,a5 102b0: 4017d793 srai a5,a5,0x1 102b4: fef42423 sw a5,-24(s0) 102b8: fec42783 lw a5,-20(s0) 102bc: 00279793 slli a5,a5,0x2 102c0: fdc42703 lw a4,-36(s0) 102c4: 00f70733 add a4,a4,a5 102c8: fe842783 lw a5,-24(s0) 102cc: 00279793 slli a5,a5,0x2 102d0: fdc42683 lw a3,-36(s0) 102d4: 00f687b3 add a5,a3,a5 102d8: 00072703 lw a4,0(a4) 102dc: 00e7a023 sw a4,0(a5) 102e0: fec42783 lw a5,-20(s0) 102e4: 00179793 slli a5,a5,0x1 102e8: 00178793 addi a5,a5,1 102ec: fef42623 sw a5,-20(s0) 102f0: fec42703 lw a4,-20(s0) 102f4: fd442783 lw a5,-44(s0) 102f8: f0f74ee3 blt a4,a5,10214 <heapify+0x44> 102fc: 0080006f j 10304 <heapify+0x134> 10300: 00000013 nop 10304: fec42783 lw a5,-20(s0) 10308: 0017f793 andi a5,a5,1 1030c: 02079063 bnez a5,1032c <heapify+0x15c> 10310: fec42783 lw a5,-20(s0) 10314: 01f7d713 srli a4,a5,0x1f 10318: 00f707b3 add a5,a4,a5 1031c: 4017d793 srai a5,a5,0x1 10320: fff78793 addi a5,a5,-1 10324: fef42423 sw a5,-24(s0) 10328: 0180006f j 10340 <heapify+0x170> 1032c: fec42783 lw a5,-20(s0) 10330: 01f7d713 srli a4,a5,0x1f 10334: 00f707b3 add a5,a4,a5 10338: 4017d793 srai a5,a5,0x1 1033c: fef42423 sw a5,-24(s0) 10340: fe842783 lw a5,-24(s0) 10344: 00279793 slli a5,a5,0x2 10348: fdc42703 lw a4,-36(s0) 1034c: 00f707b3 add a5,a4,a5 10350: fe442703 lw a4,-28(s0) 10354: 00e7a023 sw a4,0(a5) 10358: 00000013 nop 1035c: 02c12403 lw s0,44(sp) 10360: 03010113 addi sp,sp,48 10364: 00008067 ret 00010368 <heapSort>: 10368: fd010113 addi sp,sp,-48 1036c: 02112623 sw ra,44(sp) 10370: 02812423 sw s0,40(sp) 10374: 03010413 addi s0,sp,48 10378: fca42e23 sw a0,-36(s0) 1037c: fcb42c23 sw a1,-40(s0) 10380: fd842783 lw a5,-40(s0) 10384: 01f7d713 srli a4,a5,0x1f 10388: 00f707b3 add a5,a4,a5 1038c: 4017d793 srai a5,a5,0x1 10390: fff78793 addi a5,a5,-1 10394: fef42623 sw a5,-20(s0) 10398: 0200006f j 103b8 <heapSort+0x50> 1039c: fd842603 lw a2,-40(s0) 103a0: fec42583 lw a1,-20(s0) 103a4: fdc42503 lw a0,-36(s0) 103a8: e29ff0ef jal ra,101d0 <heapify> 103ac: fec42783 lw a5,-20(s0) 103b0: fff78793 addi a5,a5,-1 103b4: fef42623 sw a5,-20(s0) 103b8: fec42783 lw a5,-20(s0) 103bc: fe07d0e3 bgez a5,1039c <heapSort+0x34> 103c0: fd842783 lw a5,-40(s0) 103c4: fff78793 addi a5,a5,-1 103c8: fef42423 sw a5,-24(s0) 103cc: 03c0006f j 10408 <heapSort+0xa0> 103d0: fe842783 lw a5,-24(s0) 103d4: 00279793 slli a5,a5,0x2 103d8: fdc42703 lw a4,-36(s0) 103dc: 00f707b3 add a5,a4,a5 103e0: 00078593 mv a1,a5 103e4: fdc42503 lw a0,-36(s0) 103e8: d9dff0ef jal ra,10184 <swap> 103ec: fe842603 lw a2,-24(s0) 103f0: 00000593 li a1,0 103f4: fdc42503 lw a0,-36(s0) 103f8: dd9ff0ef jal ra,101d0 <heapify> 103fc: fe842783 lw a5,-24(s0) 10400: fff78793 addi a5,a5,-1 10404: fef42423 sw a5,-24(s0) 10408: fe842783 lw a5,-24(s0) 1040c: fc07d2e3 bgez a5,103d0 <heapSort+0x68> 10410: 00000013 nop 10414: 00000013 nop 10418: 02c12083 lw ra,44(sp) 1041c: 02812403 lw s0,40(sp) 10420: 03010113 addi sp,sp,48 10424: 00008067 ret 00010428 <containsDuplicate>: 10428: fd010113 addi sp,sp,-48 1042c: 02112623 sw ra,44(sp) 10430: 02812423 sw s0,40(sp) 10434: 03010413 addi s0,sp,48 10438: fca42e23 sw a0,-36(s0) 1043c: fcb42c23 sw a1,-40(s0) 10440: fd842583 lw a1,-40(s0) 10444: fdc42503 lw a0,-36(s0) 10448: f21ff0ef jal ra,10368 <heapSort> 1044c: fe042623 sw zero,-20(s0) 10450: 0480006f j 10498 <containsDuplicate+0x70> 10454: fec42783 lw a5,-20(s0) 10458: 00279793 slli a5,a5,0x2 1045c: fdc42703 lw a4,-36(s0) 10460: 00f707b3 add a5,a4,a5 10464: 0007a703 lw a4,0(a5) 10468: fec42783 lw a5,-20(s0) 1046c: 00178793 addi a5,a5,1 10470: 00279793 slli a5,a5,0x2 10474: fdc42683 lw a3,-36(s0) 10478: 00f687b3 add a5,a3,a5 1047c: 0007a783 lw a5,0(a5) 10480: 00f71663 bne a4,a5,1048c <containsDuplicate+0x64> 10484: 00100793 li a5,1 10488: 0240006f j 104ac <containsDuplicate+0x84> 1048c: fec42783 lw a5,-20(s0) 10490: 00178793 addi a5,a5,1 10494: fef42623 sw a5,-20(s0) 10498: fd842783 lw a5,-40(s0) 1049c: fff78793 addi a5,a5,-1 104a0: fec42703 lw a4,-20(s0) 104a4: faf748e3 blt a4,a5,10454 <containsDuplicate+0x2c> 104a8: 00000793 li a5,0 104ac: 00078513 mv a0,a5 104b0: 02c12083 lw ra,44(sp) 104b4: 02812403 lw s0,40(sp) 104b8: 03010113 addi sp,sp,48 104bc: 00008067 ret 000104c0 <main>: 104c0: fa010113 addi sp,sp,-96 104c4: 04112e23 sw ra,92(sp) 104c8: 04812c23 sw s0,88(sp) 104cc: 06010413 addi s0,sp,96 104d0: 000227b7 lui a5,0x22 104d4: c5078793 addi a5,a5,-944 # 21c50 <__clzsi2+0xc0> 104d8: 0007a503 lw a0,0(a5) 104dc: 0047a583 lw a1,4(a5) 104e0: 0087a603 lw a2,8(a5) 104e4: 00c7a683 lw a3,12(a5) 104e8: 0107a703 lw a4,16(a5) 104ec: 0147a783 lw a5,20(a5) 104f0: fca42a23 sw a0,-44(s0) 104f4: fcb42c23 sw a1,-40(s0) 104f8: fcc42e23 sw a2,-36(s0) 104fc: fed42023 sw a3,-32(s0) 10500: fee42223 sw a4,-28(s0) 10504: fef42423 sw a5,-24(s0) 10508: fe0407a3 sb zero,-17(s0) 1050c: 000227b7 lui a5,0x22 10510: c1878513 addi a0,a5,-1000 # 21c18 <__clzsi2+0x88> 10514: 388000ef jal ra,1089c <printf> 10518: fd440793 addi a5,s0,-44 1051c: 00600593 li a1,6 10520: 00078513 mv a0,a5 10524: f05ff0ef jal ra,10428 <containsDuplicate> 10528: 00050793 mv a5,a0 1052c: 00078713 mv a4,a5 10530: fef44783 lbu a5,-17(s0) 10534: 00e79a63 bne a5,a4,10548 <main+0x88> 10538: 000227b7 lui a5,0x22 1053c: c2478513 addi a0,a5,-988 # 21c24 <__clzsi2+0x94> 10540: 4d4000ef jal ra,10a14 <puts> 10544: 0100006f j 10554 <main+0x94> 10548: 000227b7 lui a5,0x22 1054c: c3078513 addi a0,a5,-976 # 21c30 <__clzsi2+0xa0> 10550: 4c4000ef jal ra,10a14 <puts> 10554: 000227b7 lui a5,0x22 10558: c6878793 addi a5,a5,-920 # 21c68 <__clzsi2+0xd8> 1055c: 0007a503 lw a0,0(a5) 10560: 0047a583 lw a1,4(a5) 10564: 0087a603 lw a2,8(a5) 10568: 00c7a683 lw a3,12(a5) 1056c: 0107a703 lw a4,16(a5) 10570: 0147a783 lw a5,20(a5) 10574: faa42e23 sw a0,-68(s0) 10578: fcb42023 sw a1,-64(s0) 1057c: fcc42223 sw a2,-60(s0) 10580: fcd42423 sw a3,-56(s0) 10584: fce42623 sw a4,-52(s0) 10588: fcf42823 sw a5,-48(s0) 1058c: 00100793 li a5,1 10590: fef40723 sb a5,-18(s0) 10594: 000227b7 lui a5,0x22 10598: c3878513 addi a0,a5,-968 # 21c38 <__clzsi2+0xa8> 1059c: 300000ef jal ra,1089c <printf> 105a0: fbc40793 addi a5,s0,-68 105a4: 00600593 li a1,6 105a8: 00078513 mv a0,a5 105ac: e7dff0ef jal ra,10428 <containsDuplicate> 105b0: 00050793 mv a5,a0 105b4: 00078713 mv a4,a5 105b8: fee44783 lbu a5,-18(s0) 105bc: 00e79a63 bne a5,a4,105d0 <main+0x110> 105c0: 000227b7 lui a5,0x22 105c4: c2478513 addi a0,a5,-988 # 21c24 <__clzsi2+0x94> 105c8: 44c000ef jal ra,10a14 <puts> 105cc: 0100006f j 105dc <main+0x11c> 105d0: 000227b7 lui a5,0x22 105d4: c3078513 addi a0,a5,-976 # 21c30 <__clzsi2+0xa0> 105d8: 43c000ef jal ra,10a14 <puts> 105dc: 000227b7 lui a5,0x22 105e0: c8078793 addi a5,a5,-896 # 21c80 <__clzsi2+0xf0> 105e4: 0007a503 lw a0,0(a5) 105e8: 0047a583 lw a1,4(a5) 105ec: 0087a603 lw a2,8(a5) 105f0: 00c7a683 lw a3,12(a5) 105f4: 0107a703 lw a4,16(a5) 105f8: 0147a783 lw a5,20(a5) 105fc: faa42223 sw a0,-92(s0) 10600: fab42423 sw a1,-88(s0) 10604: fac42623 sw a2,-84(s0) 10608: fad42823 sw a3,-80(s0) 1060c: fae42a23 sw a4,-76(s0) 10610: faf42c23 sw a5,-72(s0) 10614: 00100793 li a5,1 10618: fef406a3 sb a5,-19(s0) 1061c: 000227b7 lui a5,0x22 10620: c4478513 addi a0,a5,-956 # 21c44 <__clzsi2+0xb4> 10624: 278000ef jal ra,1089c <printf> 10628: fa440793 addi a5,s0,-92 1062c: 00600593 li a1,6 10630: 00078513 mv a0,a5 10634: df5ff0ef jal ra,10428 <containsDuplicate> 10638: 00050793 mv a5,a0 1063c: 00078713 mv a4,a5 10640: fed44783 lbu a5,-19(s0) 10644: 00e79a63 bne a5,a4,10658 <main+0x198> 10648: 000227b7 lui a5,0x22 1064c: c2478513 addi a0,a5,-988 # 21c24 <__clzsi2+0x94> 10650: 3c4000ef jal ra,10a14 <puts> 10654: 0100006f j 10664 <main+0x1a4> 10658: 000227b7 lui a5,0x22 1065c: c3078513 addi a0,a5,-976 # 21c30 <__clzsi2+0xa0> 10660: 3b4000ef jal ra,10a14 <puts> 10664: 00000793 li a5,0 10668: 00078513 mv a0,a5 1066c: 05c12083 lw ra,92(sp) 10670: 05812403 lw s0,88(sp) 10674: 06010113 addi sp,sp,96 10678: 00008067 ret ``` #### Size of Assembly code: ![](https://i.imgur.com/mSaNRIS.png) #### Obsevation: - Register Used: 10 - sx register: s0 s1 s2 s3 - tx register: None - ax register: a0 a1 a2 a3 a4 a5 - other: sp ra - Stack used(bytes): 96 (main) ### -O1 Optimized Assembly Code #### Terminal command ```shell riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -O1 -o ContainDuplicate_1 tests/ContainDuplicate.c ``` ```clike= 00010184 <swap>: 10184: 00052783 lw a5,0(a0) 10188: 0005a703 lw a4,0(a1) 1018c: 00e52023 sw a4,0(a0) 10190: 00f5a023 sw a5,0(a1) 10194: 00008067 ret 00010198 <heapify>: 10198: 00259793 slli a5,a1,0x2 1019c: 00f507b3 add a5,a0,a5 101a0: 0007a803 lw a6,0(a5) 101a4: 00159593 slli a1,a1,0x1 101a8: 00158793 addi a5,a1,1 101ac: 06c7dc63 bge a5,a2,10224 <heapify+0x8c> 101b0: fff60593 addi a1,a2,-1 101b4: 0280006f j 101dc <heapify+0x44> 101b8: 01f7d713 srli a4,a5,0x1f 101bc: 00f70733 add a4,a4,a5 101c0: 40175713 srai a4,a4,0x1 101c4: 00271713 slli a4,a4,0x2 101c8: 00e50733 add a4,a0,a4 101cc: 00d72023 sw a3,0(a4) 101d0: 00179793 slli a5,a5,0x1 101d4: 00178793 addi a5,a5,1 101d8: 04c7d663 bge a5,a2,10224 <heapify+0x8c> 101dc: 00b7de63 bge a5,a1,101f8 <heapify+0x60> 101e0: 00279713 slli a4,a5,0x2 101e4: 00e50733 add a4,a0,a4 101e8: 00072683 lw a3,0(a4) 101ec: 00472703 lw a4,4(a4) 101f0: 00e6a733 slt a4,a3,a4 101f4: 00e787b3 add a5,a5,a4 101f8: 00279713 slli a4,a5,0x2 101fc: 00e50733 add a4,a0,a4 10200: 00072683 lw a3,0(a4) 10204: 02d85063 bge a6,a3,10224 <heapify+0x8c> 10208: 0017f713 andi a4,a5,1 1020c: fa0716e3 bnez a4,101b8 <heapify+0x20> 10210: 01f7d713 srli a4,a5,0x1f 10214: 00f70733 add a4,a4,a5 10218: 40175713 srai a4,a4,0x1 1021c: fff70713 addi a4,a4,-1 10220: fa5ff06f j 101c4 <heapify+0x2c> 10224: 0017f713 andi a4,a5,1 10228: 02071263 bnez a4,1024c <heapify+0xb4> 1022c: 01f7d713 srli a4,a5,0x1f 10230: 00f707b3 add a5,a4,a5 10234: 4017d793 srai a5,a5,0x1 10238: fff78793 addi a5,a5,-1 1023c: 00279793 slli a5,a5,0x2 10240: 00f50533 add a0,a0,a5 10244: 01052023 sw a6,0(a0) 10248: 00008067 ret 1024c: 01f7d713 srli a4,a5,0x1f 10250: 00f707b3 add a5,a4,a5 10254: 4017d793 srai a5,a5,0x1 10258: fe5ff06f j 1023c <heapify+0xa4> 0001025c <heapSort>: 1025c: fe010113 addi sp,sp,-32 10260: 00112e23 sw ra,28(sp) 10264: 00812c23 sw s0,24(sp) 10268: 00912a23 sw s1,20(sp) 1026c: 01212823 sw s2,16(sp) 10270: 01312623 sw s3,12(sp) 10274: 00050913 mv s2,a0 10278: 00058413 mv s0,a1 1027c: 01f5d493 srli s1,a1,0x1f 10280: 00b484b3 add s1,s1,a1 10284: 4014d493 srai s1,s1,0x1 10288: fff48493 addi s1,s1,-1 1028c: 0204c063 bltz s1,102ac <heapSort+0x50> 10290: fff00993 li s3,-1 10294: 00040613 mv a2,s0 10298: 00048593 mv a1,s1 1029c: 00090513 mv a0,s2 102a0: ef9ff0ef jal ra,10198 <heapify> 102a4: fff48493 addi s1,s1,-1 102a8: ff3496e3 bne s1,s3,10294 <heapSort+0x38> 102ac: fff40493 addi s1,s0,-1 102b0: 0204ce63 bltz s1,102ec <heapSort+0x90> 102b4: 00241413 slli s0,s0,0x2 102b8: 00890433 add s0,s2,s0 102bc: fff00993 li s3,-1 102c0: 00092783 lw a5,0(s2) 102c4: ffc42703 lw a4,-4(s0) 102c8: 00e92023 sw a4,0(s2) 102cc: fef42e23 sw a5,-4(s0) 102d0: 00048613 mv a2,s1 102d4: 00000593 li a1,0 102d8: 00090513 mv a0,s2 102dc: ebdff0ef jal ra,10198 <heapify> 102e0: fff48493 addi s1,s1,-1 102e4: ffc40413 addi s0,s0,-4 102e8: fd349ce3 bne s1,s3,102c0 <heapSort+0x64> 102ec: 01c12083 lw ra,28(sp) 102f0: 01812403 lw s0,24(sp) 102f4: 01412483 lw s1,20(sp) 102f8: 01012903 lw s2,16(sp) 102fc: 00c12983 lw s3,12(sp) 10300: 02010113 addi sp,sp,32 10304: 00008067 ret 00010308 <containsDuplicate>: 10308: ff010113 addi sp,sp,-16 1030c: 00112623 sw ra,12(sp) 10310: 00812423 sw s0,8(sp) 10314: 00912223 sw s1,4(sp) 10318: 00050413 mv s0,a0 1031c: 00058493 mv s1,a1 10320: f3dff0ef jal ra,1025c <heapSort> 10324: 00100793 li a5,1 10328: 0297d863 bge a5,s1,10358 <containsDuplicate+0x50> 1032c: 00040513 mv a0,s0 10330: fff48593 addi a1,s1,-1 10334: 00000793 li a5,0 10338: 00052683 lw a3,0(a0) 1033c: 00452703 lw a4,4(a0) 10340: 02e68063 beq a3,a4,10360 <containsDuplicate+0x58> 10344: 00178793 addi a5,a5,1 10348: 00450513 addi a0,a0,4 1034c: feb796e3 bne a5,a1,10338 <containsDuplicate+0x30> 10350: 00000513 li a0,0 10354: 0100006f j 10364 <containsDuplicate+0x5c> 10358: 00000513 li a0,0 1035c: 0080006f j 10364 <containsDuplicate+0x5c> 10360: 00100513 li a0,1 10364: 00c12083 lw ra,12(sp) 10368: 00812403 lw s0,8(sp) 1036c: 00412483 lw s1,4(sp) 10370: 01010113 addi sp,sp,16 10374: 00008067 ret 00010378 <main>: 10378: fa010113 addi sp,sp,-96 1037c: 04112e23 sw ra,92(sp) 10380: 000227b7 lui a5,0x22 10384: ab878793 addi a5,a5,-1352 # 21ab8 <__clzsi2+0xc4> 10388: 0007a503 lw a0,0(a5) 1038c: 0047a583 lw a1,4(a5) 10390: 0087a603 lw a2,8(a5) 10394: 00c7a683 lw a3,12(a5) 10398: 0107a703 lw a4,16(a5) 1039c: 0147a783 lw a5,20(a5) 103a0: 02a12c23 sw a0,56(sp) 103a4: 02b12e23 sw a1,60(sp) 103a8: 04c12023 sw a2,64(sp) 103ac: 04d12223 sw a3,68(sp) 103b0: 04e12423 sw a4,72(sp) 103b4: 04f12623 sw a5,76(sp) 103b8: 00022537 lui a0,0x22 103bc: a8050513 addi a0,a0,-1408 # 21a80 <__clzsi2+0x8c> 103c0: 340000ef jal ra,10700 <printf> 103c4: 00600593 li a1,6 103c8: 03810513 addi a0,sp,56 103cc: f3dff0ef jal ra,10308 <containsDuplicate> 103d0: 0e051063 bnez a0,104b0 <main+0x138> 103d4: 00022537 lui a0,0x22 103d8: a8c50513 addi a0,a0,-1396 # 21a8c <__clzsi2+0x98> 103dc: 49c000ef jal ra,10878 <puts> 103e0: 000227b7 lui a5,0x22 103e4: ab878793 addi a5,a5,-1352 # 21ab8 <__clzsi2+0xc4> 103e8: 0187a503 lw a0,24(a5) 103ec: 01c7a583 lw a1,28(a5) 103f0: 0207a603 lw a2,32(a5) 103f4: 0247a683 lw a3,36(a5) 103f8: 0287a703 lw a4,40(a5) 103fc: 02c7a783 lw a5,44(a5) 10400: 02a12023 sw a0,32(sp) 10404: 02b12223 sw a1,36(sp) 10408: 02c12423 sw a2,40(sp) 1040c: 02d12623 sw a3,44(sp) 10410: 02e12823 sw a4,48(sp) 10414: 02f12a23 sw a5,52(sp) 10418: 00022537 lui a0,0x22 1041c: aa050513 addi a0,a0,-1376 # 21aa0 <__clzsi2+0xac> 10420: 2e0000ef jal ra,10700 <printf> 10424: 00600593 li a1,6 10428: 02010513 addi a0,sp,32 1042c: eddff0ef jal ra,10308 <containsDuplicate> 10430: 08050863 beqz a0,104c0 <main+0x148> 10434: 00022537 lui a0,0x22 10438: a8c50513 addi a0,a0,-1396 # 21a8c <__clzsi2+0x98> 1043c: 43c000ef jal ra,10878 <puts> 10440: 000227b7 lui a5,0x22 10444: ab878793 addi a5,a5,-1352 # 21ab8 <__clzsi2+0xc4> 10448: 0307a503 lw a0,48(a5) 1044c: 0347a583 lw a1,52(a5) 10450: 0387a603 lw a2,56(a5) 10454: 03c7a683 lw a3,60(a5) 10458: 0407a703 lw a4,64(a5) 1045c: 0447a783 lw a5,68(a5) 10460: 00a12423 sw a0,8(sp) 10464: 00b12623 sw a1,12(sp) 10468: 00c12823 sw a2,16(sp) 1046c: 00d12a23 sw a3,20(sp) 10470: 00e12c23 sw a4,24(sp) 10474: 00f12e23 sw a5,28(sp) 10478: 00022537 lui a0,0x22 1047c: aac50513 addi a0,a0,-1364 # 21aac <__clzsi2+0xb8> 10480: 280000ef jal ra,10700 <printf> 10484: 00600593 li a1,6 10488: 00810513 addi a0,sp,8 1048c: e7dff0ef jal ra,10308 <containsDuplicate> 10490: 04050063 beqz a0,104d0 <main+0x158> 10494: 00022537 lui a0,0x22 10498: a8c50513 addi a0,a0,-1396 # 21a8c <__clzsi2+0x98> 1049c: 3dc000ef jal ra,10878 <puts> 104a0: 00000513 li a0,0 104a4: 05c12083 lw ra,92(sp) 104a8: 06010113 addi sp,sp,96 104ac: 00008067 ret 104b0: 00022537 lui a0,0x22 104b4: a9850513 addi a0,a0,-1384 # 21a98 <__clzsi2+0xa4> 104b8: 3c0000ef jal ra,10878 <puts> 104bc: f25ff06f j 103e0 <main+0x68> 104c0: 00022537 lui a0,0x22 104c4: a9850513 addi a0,a0,-1384 # 21a98 <__clzsi2+0xa4> 104c8: 3b0000ef jal ra,10878 <puts> 104cc: f75ff06f j 10440 <main+0xc8> 104d0: 00022537 lui a0,0x22 104d4: a9850513 addi a0,a0,-1384 # 21a98 <__clzsi2+0xa4> 104d8: 3a0000ef jal ra,10878 <puts> 104dc: fc5ff06f j 104a0 <main+0x128> ``` #### Size of Assembly code: ![](https://i.imgur.com/yZrC8Nz.png) #### Obsevation: - Register Used: 10 - sx register: s0 s1 s2 s3 - tx register: None - ax register: a0 a1 a2 a3 a4 a5 - other: sp ra - Stack used(bytes): 96 (main) ### -O3 Optimized Assembly Code #### Terminal command ```shell riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -O3 -o ContainDuplicate_3 tests/ContainDuplicate.c ``` ```clike= 000100c4 <main>: 100c4: fa010113 addi sp,sp,-96 100c8: 04812c23 sw s0,88(sp) 100cc: 00022437 lui s0,0x22 100d0: b8040413 addi s0,s0,-1152 # 21b80 <__clzsi2+0xc0> 100d4: 00442583 lw a1,4(s0) 100d8: 00042803 lw a6,0(s0) 100dc: 00842603 lw a2,8(s0) 100e0: 00c42683 lw a3,12(s0) 100e4: 01042703 lw a4,16(s0) 100e8: 01442783 lw a5,20(s0) 100ec: 00022537 lui a0,0x22 100f0: b4850513 addi a0,a0,-1208 # 21b48 <__clzsi2+0x88> 100f4: 04112e23 sw ra,92(sp) 100f8: 00b12623 sw a1,12(sp) 100fc: 01012423 sw a6,8(sp) 10100: 00c12823 sw a2,16(sp) 10104: 00d12a23 sw a3,20(sp) 10108: 00e12c23 sw a4,24(sp) 1010c: 00f12e23 sw a5,28(sp) 10110: 6bc000ef jal ra,107cc <printf> 10114: 00600593 li a1,6 10118: 00810513 addi a0,sp,8 1011c: 410000ef jal ra,1052c <containsDuplicate> 10120: 0c051a63 bnez a0,101f4 <main+0x130> 10124: 00022537 lui a0,0x22 10128: b5450513 addi a0,a0,-1196 # 21b54 <__clzsi2+0x94> 1012c: 019000ef jal ra,10944 <puts> 10130: 01c42583 lw a1,28(s0) 10134: 01842803 lw a6,24(s0) 10138: 02042603 lw a2,32(s0) 1013c: 02442683 lw a3,36(s0) 10140: 02842703 lw a4,40(s0) 10144: 02c42783 lw a5,44(s0) 10148: 00022537 lui a0,0x22 1014c: b6850513 addi a0,a0,-1176 # 21b68 <__clzsi2+0xa8> 10150: 02b12223 sw a1,36(sp) 10154: 03012023 sw a6,32(sp) 10158: 02c12423 sw a2,40(sp) 1015c: 02d12623 sw a3,44(sp) 10160: 02e12823 sw a4,48(sp) 10164: 02f12a23 sw a5,52(sp) 10168: 664000ef jal ra,107cc <printf> 1016c: 00600593 li a1,6 10170: 02010513 addi a0,sp,32 10174: 3b8000ef jal ra,1052c <containsDuplicate> 10178: 08050e63 beqz a0,10214 <main+0x150> 1017c: 00022537 lui a0,0x22 10180: b5450513 addi a0,a0,-1196 # 21b54 <__clzsi2+0x94> 10184: 7c0000ef jal ra,10944 <puts> 10188: 03442583 lw a1,52(s0) 1018c: 03042803 lw a6,48(s0) 10190: 03842603 lw a2,56(s0) 10194: 03c42683 lw a3,60(s0) 10198: 04042703 lw a4,64(s0) 1019c: 04442783 lw a5,68(s0) 101a0: 00022537 lui a0,0x22 101a4: b7450513 addi a0,a0,-1164 # 21b74 <__clzsi2+0xb4> 101a8: 02b12e23 sw a1,60(sp) 101ac: 03012c23 sw a6,56(sp) 101b0: 04c12023 sw a2,64(sp) 101b4: 04d12223 sw a3,68(sp) 101b8: 04e12423 sw a4,72(sp) 101bc: 04f12623 sw a5,76(sp) 101c0: 60c000ef jal ra,107cc <printf> 101c4: 00600593 li a1,6 101c8: 03810513 addi a0,sp,56 101cc: 360000ef jal ra,1052c <containsDuplicate> 101d0: 02050a63 beqz a0,10204 <main+0x140> 101d4: 00022537 lui a0,0x22 101d8: b5450513 addi a0,a0,-1196 # 21b54 <__clzsi2+0x94> 101dc: 768000ef jal ra,10944 <puts> 101e0: 05c12083 lw ra,92(sp) 101e4: 05812403 lw s0,88(sp) 101e8: 00000513 li a0,0 101ec: 06010113 addi sp,sp,96 101f0: 00008067 ret 101f4: 00022537 lui a0,0x22 101f8: b6050513 addi a0,a0,-1184 # 21b60 <__clzsi2+0xa0> 101fc: 748000ef jal ra,10944 <puts> 10200: f31ff06f j 10130 <main+0x6c> 10204: 00022537 lui a0,0x22 10208: b6050513 addi a0,a0,-1184 # 21b60 <__clzsi2+0xa0> 1020c: 738000ef jal ra,10944 <puts> 10210: fd1ff06f j 101e0 <main+0x11c> 10214: 00022537 lui a0,0x22 10218: b6050513 addi a0,a0,-1184 # 21b60 <__clzsi2+0xa0> 1021c: 728000ef jal ra,10944 <puts> 10220: f69ff06f j 10188 <main+0xc4> 000102e4 <swap>: 102e4: 0005a703 lw a4,0(a1) 102e8: 00052783 lw a5,0(a0) 102ec: 00e52023 sw a4,0(a0) 102f0: 00f5a023 sw a5,0(a1) 102f4: 00008067 ret 000102f8 <heapify>: 102f8: 00259793 slli a5,a1,0x2 102fc: 00159593 slli a1,a1,0x1 10300: 00f507b3 add a5,a0,a5 10304: 00158713 addi a4,a1,1 10308: 0007ae03 lw t3,0(a5) 1030c: fff60e93 addi t4,a2,-1 10310: 00c74a63 blt a4,a2,10324 <heapify+0x2c> 10314: 0780006f j 1038c <heapify+0x94> 10318: 00130713 addi a4,t1,1 # 102d9 <frame_dummy+0x15> 1031c: 00d7a023 sw a3,0(a5) 10320: 06c75663 bge a4,a2,1038c <heapify+0x94> 10324: 00271793 slli a5,a4,0x2 10328: 00f507b3 add a5,a0,a5 1032c: 0007a683 lw a3,0(a5) 10330: 01d75a63 bge a4,t4,10344 <heapify+0x4c> 10334: 0047a783 lw a5,4(a5) 10338: 00f6d663 bge a3,a5,10344 <heapify+0x4c> 1033c: 00170713 addi a4,a4,1 10340: 00078693 mv a3,a5 10344: 01f75593 srli a1,a4,0x1f 10348: 00e587b3 add a5,a1,a4 1034c: 00177813 andi a6,a4,1 10350: 00183893 seqz a7,a6 10354: 4017d793 srai a5,a5,0x1 10358: 411787b3 sub a5,a5,a7 1035c: 00279793 slli a5,a5,0x2 10360: 00171313 slli t1,a4,0x1 10364: 00f507b3 add a5,a0,a5 10368: fade48e3 blt t3,a3,10318 <heapify+0x20> 1036c: 00e585b3 add a1,a1,a4 10370: 4015d593 srai a1,a1,0x1 10374: 00183813 seqz a6,a6 10378: 410585b3 sub a1,a1,a6 1037c: 00259593 slli a1,a1,0x2 10380: 00b50533 add a0,a0,a1 10384: 01c52023 sw t3,0(a0) 10388: 00008067 ret 1038c: 01f75593 srli a1,a4,0x1f 10390: 00177813 andi a6,a4,1 10394: 00e585b3 add a1,a1,a4 10398: 4015d593 srai a1,a1,0x1 1039c: 00183813 seqz a6,a6 103a0: 410585b3 sub a1,a1,a6 103a4: 00259593 slli a1,a1,0x2 103a8: 00b50533 add a0,a0,a1 103ac: 01c52023 sw t3,0(a0) 103b0: 00008067 ret 000103b4 <heapSort>: 103b4: fe010113 addi sp,sp,-32 103b8: 01212823 sw s2,16(sp) 103bc: 01f5d913 srli s2,a1,0x1f 103c0: 00b90933 add s2,s2,a1 103c4: 40195913 srai s2,s2,0x1 103c8: 00812c23 sw s0,24(sp) 103cc: 00912a23 sw s1,20(sp) 103d0: 01312623 sw s3,12(sp) 103d4: 00112e23 sw ra,28(sp) 103d8: fff90913 addi s2,s2,-1 103dc: 00058493 mv s1,a1 103e0: 00050413 mv s0,a0 103e4: fff00993 li s3,-1 103e8: 00094e63 bltz s2,10404 <heapSort+0x50> 103ec: 00090593 mv a1,s2 103f0: 00048613 mv a2,s1 103f4: fff90913 addi s2,s2,-1 103f8: 00040513 mv a0,s0 103fc: efdff0ef jal ra,102f8 <heapify> 10400: ff3916e3 bne s2,s3,103ec <heapSort+0x38> 10404: fff48813 addi a6,s1,-1 10408: 0c084e63 bltz a6,104e4 <heapSort+0x130> 1040c: 00100793 li a5,1 10410: 0b07d463 bge a5,a6,104b8 <heapSort+0x104> 10414: 00249e13 slli t3,s1,0x2 10418: 00080313 mv t1,a6 1041c: 01c40e33 add t3,s0,t3 10420: 00100e93 li t4,1 10424: ffce2703 lw a4,-4(t3) 10428: 00042783 lw a5,0(s0) 1042c: fff80813 addi a6,a6,-1 10430: 00e42023 sw a4,0(s0) 10434: fefe2e23 sw a5,-4(t3) 10438: 00042883 lw a7,0(s0) 1043c: 00100713 li a4,1 10440: 00c0006f j 1044c <heapSort+0x98> 10444: 00d7a023 sw a3,0(a5) 10448: 0a675c63 bge a4,t1,10500 <heapSort+0x14c> 1044c: 00271793 slli a5,a4,0x2 10450: 00f407b3 add a5,s0,a5 10454: 0007a683 lw a3,0(a5) 10458: 01075a63 bge a4,a6,1046c <heapSort+0xb8> 1045c: 0047a783 lw a5,4(a5) 10460: 00f6d663 bge a3,a5,1046c <heapSort+0xb8> 10464: 00170713 addi a4,a4,1 10468: 00078693 mv a3,a5 1046c: 00177613 andi a2,a4,1 10470: 40175793 srai a5,a4,0x1 10474: 00171513 slli a0,a4,0x1 10478: 00163593 seqz a1,a2 1047c: 00150713 addi a4,a0,1 10480: 00078513 mv a0,a5 10484: 40b787b3 sub a5,a5,a1 10488: 00279793 slli a5,a5,0x2 1048c: 00f407b3 add a5,s0,a5 10490: fad8cae3 blt a7,a3,10444 <heapSort+0x90> 10494: 02061a63 bnez a2,104c8 <heapSort+0x114> 10498: fff50793 addi a5,a0,-1 1049c: 00279793 slli a5,a5,0x2 104a0: 00f407b3 add a5,s0,a5 104a4: 0117a023 sw a7,0(a5) 104a8: fff30313 addi t1,t1,-1 104ac: ffce0e13 addi t3,t3,-4 104b0: f70ecae3 blt t4,a6,10424 <heapSort+0x70> 104b4: 02084863 bltz a6,104e4 <heapSort+0x130> 104b8: 00281793 slli a5,a6,0x2 104bc: 00f407b3 add a5,s0,a5 104c0: 00100613 li a2,1 104c4: 04c0006f j 10510 <heapSort+0x15c> 104c8: 00251513 slli a0,a0,0x2 104cc: 00a40533 add a0,s0,a0 104d0: 01152023 sw a7,0(a0) 104d4: fff30313 addi t1,t1,-1 104d8: ffce0e13 addi t3,t3,-4 104dc: f50ec4e3 blt t4,a6,10424 <heapSort+0x70> 104e0: fc085ce3 bgez a6,104b8 <heapSort+0x104> 104e4: 01c12083 lw ra,28(sp) 104e8: 01812403 lw s0,24(sp) 104ec: 01412483 lw s1,20(sp) 104f0: 01012903 lw s2,16(sp) 104f4: 00c12983 lw s3,12(sp) 104f8: 02010113 addi sp,sp,32 104fc: 00008067 ret 10500: 00177613 andi a2,a4,1 10504: 40175513 srai a0,a4,0x1 10508: f8dff06f j 10494 <heapSort+0xe0> 1050c: 00000813 li a6,0 10510: 0007a683 lw a3,0(a5) 10514: 00042703 lw a4,0(s0) 10518: ffc78793 addi a5,a5,-4 1051c: 00d42023 sw a3,0(s0) 10520: 00e7a223 sw a4,4(a5) 10524: fec804e3 beq a6,a2,1050c <heapSort+0x158> 10528: fbdff06f j 104e4 <heapSort+0x130> 0001052c <containsDuplicate>: 1052c: ff010113 addi sp,sp,-16 10530: 00812423 sw s0,8(sp) 10534: 00912223 sw s1,4(sp) 10538: 00112623 sw ra,12(sp) 1053c: 00058493 mv s1,a1 10540: 00050413 mv s0,a0 10544: e71ff0ef jal ra,103b4 <heapSort> 10548: 00100793 li a5,1 1054c: 0497d463 bge a5,s1,10594 <containsDuplicate+0x68> 10550: 00042783 lw a5,0(s0) 10554: fff48613 addi a2,s1,-1 10558: 00440513 addi a0,s0,4 1055c: 00000713 li a4,0 10560: 0080006f j 10568 <containsDuplicate+0x3c> 10564: 02c70863 beq a4,a2,10594 <containsDuplicate+0x68> 10568: 00078693 mv a3,a5 1056c: 00052783 lw a5,0(a0) 10570: 00170713 addi a4,a4,1 10574: 00450513 addi a0,a0,4 10578: fed796e3 bne a5,a3,10564 <containsDuplicate+0x38> 1057c: 00c12083 lw ra,12(sp) 10580: 00812403 lw s0,8(sp) 10584: 00412483 lw s1,4(sp) 10588: 00100513 li a0,1 1058c: 01010113 addi sp,sp,16 10590: 00008067 ret 10594: 00c12083 lw ra,12(sp) 10598: 00812403 lw s0,8(sp) 1059c: 00412483 lw s1,4(sp) 105a0: 00000513 li a0,0 105a4: 01010113 addi sp,sp,16 105a8: 00008067 ret ``` #### Size of Assembly code: ![](https://i.imgur.com/rhOPc26.png) #### Obsevation: - Register Used: 12 - sx register: s0 s1 s2 s3 - tx register: None - ax register: a0 a1 a2 a3 a4 a5 a6 a7 - other: None - Stack used(bytes): 96 ### -Os Optimized Assembly Code #### Terminal command ```shell riscv-none-elf-gcc -march=rv32i -mabi=ilp32 -Os -o ContainDuplicate_1 tests/ContainDuplicate_s.c ``` ```clike= 000100c4 <main>: 100c4: fa010113 addi sp,sp,-96 100c8: 04812c23 sw s0,88(sp) 100cc: 00022437 lui s0,0x22 100d0: a0840593 addi a1,s0,-1528 # 21a08 <__clzsi2+0xc4> 100d4: 01800613 li a2,24 100d8: 00810513 addi a0,sp,8 100dc: 04112e23 sw ra,92(sp) 100e0: 454000ef jal ra,10534 <memcpy> 100e4: 00022537 lui a0,0x22 100e8: 9d050513 addi a0,a0,-1584 # 219d0 <__clzsi2+0x8c> 100ec: 708000ef jal ra,107f4 <printf> 100f0: 00600593 li a1,6 100f4: 00810513 addi a0,sp,8 100f8: 2d4000ef jal ra,103cc <containsDuplicate> 100fc: 0a051063 bnez a0,1019c <main+0xd8> 10100: 00022537 lui a0,0x22 10104: 9dc50513 addi a0,a0,-1572 # 219dc <__clzsi2+0x98> 10108: 065000ef jal ra,1096c <puts> 1010c: a0840593 addi a1,s0,-1528 10110: 01858593 addi a1,a1,24 10114: 01800613 li a2,24 10118: 02010513 addi a0,sp,32 1011c: 418000ef jal ra,10534 <memcpy> 10120: 00022537 lui a0,0x22 10124: 9f050513 addi a0,a0,-1552 # 219f0 <__clzsi2+0xac> 10128: 6cc000ef jal ra,107f4 <printf> 1012c: 00600593 li a1,6 10130: 02010513 addi a0,sp,32 10134: 298000ef jal ra,103cc <containsDuplicate> 10138: 06050863 beqz a0,101a8 <main+0xe4> 1013c: 00022537 lui a0,0x22 10140: 9dc50513 addi a0,a0,-1572 # 219dc <__clzsi2+0x98> 10144: 029000ef jal ra,1096c <puts> 10148: 000225b7 lui a1,0x22 1014c: a0858593 addi a1,a1,-1528 # 21a08 <__clzsi2+0xc4> 10150: 03058593 addi a1,a1,48 10154: 01800613 li a2,24 10158: 03810513 addi a0,sp,56 1015c: 3d8000ef jal ra,10534 <memcpy> 10160: 00022537 lui a0,0x22 10164: 9fc50513 addi a0,a0,-1540 # 219fc <__clzsi2+0xb8> 10168: 68c000ef jal ra,107f4 <printf> 1016c: 00600593 li a1,6 10170: 03810513 addi a0,sp,56 10174: 258000ef jal ra,103cc <containsDuplicate> 10178: 02050e63 beqz a0,101b4 <main+0xf0> 1017c: 00022537 lui a0,0x22 10180: 9dc50513 addi a0,a0,-1572 # 219dc <__clzsi2+0x98> 10184: 7e8000ef jal ra,1096c <puts> 10188: 05c12083 lw ra,92(sp) 1018c: 05812403 lw s0,88(sp) 10190: 00000513 li a0,0 10194: 06010113 addi sp,sp,96 10198: 00008067 ret 1019c: 00022537 lui a0,0x22 101a0: 9e850513 addi a0,a0,-1560 # 219e8 <__clzsi2+0xa4> 101a4: f65ff06f j 10108 <main+0x44> 101a8: 00022537 lui a0,0x22 101ac: 9e850513 addi a0,a0,-1560 # 219e8 <__clzsi2+0xa4> 101b0: f95ff06f j 10144 <main+0x80> 101b4: 00022537 lui a0,0x22 101b8: 9e850513 addi a0,a0,-1560 # 219e8 <__clzsi2+0xa4> 101bc: fc9ff06f j 10184 <main+0xc0> 00010280 <swap>: 10280: 0005a703 lw a4,0(a1) 10284: 00052783 lw a5,0(a0) 10288: 00e52023 sw a4,0(a0) 1028c: 00f5a023 sw a5,0(a1) 10290: 00008067 ret 00010294 <heapify>: 10294: 00259793 slli a5,a1,0x2 10298: 00f507b3 add a5,a0,a5 1029c: 0007a703 lw a4,0(a5) 102a0: 00159593 slli a1,a1,0x1 102a4: 00158593 addi a1,a1,1 102a8: fff60813 addi a6,a2,-1 102ac: 02c5c663 blt a1,a2,102d8 <heapify+0x44> 102b0: 01f5d793 srli a5,a1,0x1f 102b4: 00b787b3 add a5,a5,a1 102b8: 0015f593 andi a1,a1,1 102bc: 4017d793 srai a5,a5,0x1 102c0: 00059463 bnez a1,102c8 <heapify+0x34> 102c4: fff78793 addi a5,a5,-1 102c8: 00279793 slli a5,a5,0x2 102cc: 00f50533 add a0,a0,a5 102d0: 00e52023 sw a4,0(a0) 102d4: 00008067 ret 102d8: 00259793 slli a5,a1,0x2 102dc: 00f507b3 add a5,a0,a5 102e0: 0007a683 lw a3,0(a5) 102e4: 0105d863 bge a1,a6,102f4 <heapify+0x60> 102e8: 0047a783 lw a5,4(a5) 102ec: 00f6d463 bge a3,a5,102f4 <heapify+0x60> 102f0: 00158593 addi a1,a1,1 102f4: 00259793 slli a5,a1,0x2 102f8: 00f507b3 add a5,a0,a5 102fc: 0007a683 lw a3,0(a5) 10300: 01f5d793 srli a5,a1,0x1f 10304: 00b787b3 add a5,a5,a1 10308: 4017d793 srai a5,a5,0x1 1030c: 0015f893 andi a7,a1,1 10310: fad750e3 bge a4,a3,102b0 <heapify+0x1c> 10314: 00089463 bnez a7,1031c <heapify+0x88> 10318: fff78793 addi a5,a5,-1 1031c: 00279793 slli a5,a5,0x2 10320: 00f507b3 add a5,a0,a5 10324: 00159593 slli a1,a1,0x1 10328: 00d7a023 sw a3,0(a5) 1032c: 00158593 addi a1,a1,1 10330: f7dff06f j 102ac <heapify+0x18> 00010334 <heapSort>: 10334: ff010113 addi sp,sp,-16 10338: 00912223 sw s1,4(sp) 1033c: 01f5d493 srli s1,a1,0x1f 10340: 00b484b3 add s1,s1,a1 10344: 00812423 sw s0,8(sp) 10348: 01212023 sw s2,0(sp) 1034c: 00112623 sw ra,12(sp) 10350: 00050913 mv s2,a0 10354: 00058413 mv s0,a1 10358: 4014d493 srai s1,s1,0x1 1035c: fff48493 addi s1,s1,-1 10360: 0204d863 bgez s1,10390 <heapSort+0x5c> 10364: fff40493 addi s1,s0,-1 10368: 00241413 slli s0,s0,0x2 1036c: 00890433 add s0,s2,s0 10370: ffc40413 addi s0,s0,-4 10374: 0204d863 bgez s1,103a4 <heapSort+0x70> 10378: 00c12083 lw ra,12(sp) 1037c: 00812403 lw s0,8(sp) 10380: 00412483 lw s1,4(sp) 10384: 00012903 lw s2,0(sp) 10388: 01010113 addi sp,sp,16 1038c: 00008067 ret 10390: 00040613 mv a2,s0 10394: 00048593 mv a1,s1 10398: 00090513 mv a0,s2 1039c: ef9ff0ef jal ra,10294 <heapify> 103a0: fbdff06f j 1035c <heapSort+0x28> 103a4: 00042703 lw a4,0(s0) 103a8: 00092783 lw a5,0(s2) 103ac: 00048613 mv a2,s1 103b0: 00e92023 sw a4,0(s2) 103b4: 00f42023 sw a5,0(s0) 103b8: 00000593 li a1,0 103bc: 00090513 mv a0,s2 103c0: ed5ff0ef jal ra,10294 <heapify> 103c4: fff48493 addi s1,s1,-1 103c8: fa9ff06f j 10370 <heapSort+0x3c> 000103cc <containsDuplicate>: 103cc: ff010113 addi sp,sp,-16 103d0: 00812423 sw s0,8(sp) 103d4: 00912223 sw s1,4(sp) 103d8: 00050413 mv s0,a0 103dc: 00058493 mv s1,a1 103e0: 00112623 sw ra,12(sp) 103e4: f51ff0ef jal ra,10334 <heapSort> 103e8: 00040513 mv a0,s0 103ec: 00000793 li a5,0 103f0: fff48493 addi s1,s1,-1 103f4: 0097ce63 blt a5,s1,10410 <containsDuplicate+0x44> 103f8: 00000513 li a0,0 103fc: 00c12083 lw ra,12(sp) 10400: 00812403 lw s0,8(sp) 10404: 00412483 lw s1,4(sp) 10408: 01010113 addi sp,sp,16 1040c: 00008067 ret 10410: 00052683 lw a3,0(a0) 10414: 00450513 addi a0,a0,4 10418: 00052703 lw a4,0(a0) 1041c: 00e68663 beq a3,a4,10428 <containsDuplicate+0x5c> 10420: 00178793 addi a5,a5,1 10424: fd1ff06f j 103f4 <containsDuplicate+0x28> 10428: 00100513 li a0,1 1042c: fd1ff06f j 103fc <containsDuplicate+0x30> ``` #### Size of Assembly code: ![](https://i.imgur.com/ZKmfq9O.png) #### Obsevation: - Register Used: 9 - sx register: s0 s1 s2 - tx register: None - ax register: a0 a1 a2 a3 a4 a5 - other: sp ra - Stack used(bytes): 96 ## Results of rv32emu | Subject | Original | -O0 | -O1 | -O3 | -Os | |:----------------:|:--------:|:-----:|:-----:|:-----:| ----- | | Size(Text) | None | 76204 | 75792 | 75996 | 75616 | | Used Registers | 16 | 10 | 10 | 12 | 9 | | CSR cycle count | None | 6482 |4862 | 4714 | 4984 | ### Conclusion: * From `Original`: Number of used register is higher (about 1.5 times). * From `O0` to `O1`: Significantly decreased code size and cycle count. * From `O1` to `O3`: More ambitious on stack and register scheduling. Slightly decreased cycle count. * `Os`Comparing with `O1` and `O3`: the assembly code has smaller size, fewer register was used. But required more cycle to finish.