Try   HackMD

Assignment2: RISC-V Toolchain

contributed by < Q36091334 >

tags: RISC-V, jserv

Still working on

Total Hamming Distance (Leetcode 477)

The Hamming distance between two integers is the number of positions at which the corresponding bits are different.

Given an integer array nums, return the sum of Hamming distances between all the pairs of the integers in nums.

Given integer will be 32bit

Solution C language

class Solution { public: int totalHammingDistance(vector<int>& nums) { int res = 0, n = nums.size(); for (int i = 0; i < 32; ++i) { int cnt = 0; for (int num : nums) { if (num & (1 << i)) ++cnt; } res += cnt * (n - cnt); } return res; } };
  1. Idea:
    Initial values, store some data to memory and jump to the L2
  2. L2
    for loop if i < 32 jump to L3.

produced by gcc with -O3

Execute the elf file

Complied by -O0(original)

test3:     file format elf32-littleriscv


Disassembly of section .text:

00010074 <totalHammingDistance>:
   10074:	fd010113          	addi	sp,sp,-48
   10078:	02112623          	sw	ra,44(sp)
   1007c:	02812423          	sw	s0,40(sp)
   10080:	03010413          	addi	s0,sp,48
   10084:	fca42e23          	sw	a0,-36(s0)
   10088:	00058793          	mv	a5,a1
   1008c:	fcf41d23          	sh	a5,-38(s0)
   10090:	fe041723          	sh	zero,-18(s0)
   10094:	fe041623          	sh	zero,-20(s0)
   10098:	fe041523          	sh	zero,-22(s0)
   1009c:	fe041623          	sh	zero,-20(s0)
   100a0:	0b00006f          	j	10150 <totalHammingDistance+0xdc>
   100a4:	fe042223          	sw	zero,-28(s0)
   100a8:	fe041523          	sh	zero,-22(s0)
   100ac:	0480006f          	j	100f4 <totalHammingDistance+0x80>
   100b0:	fea45783          	lhu	a5,-22(s0)
   100b4:	00179793          	slli	a5,a5,0x1
   100b8:	fdc42703          	lw	a4,-36(s0)
   100bc:	00f707b3          	add	a5,a4,a5
   100c0:	0007d783          	lhu	a5,0(a5)
   100c4:	fef41123          	sh	a5,-30(s0)
   100c8:	fe245703          	lhu	a4,-30(s0)
   100cc:	fec45783          	lhu	a5,-20(s0)
   100d0:	40f757b3          	sra	a5,a4,a5
   100d4:	0017f793          	andi	a5,a5,1
   100d8:	00078863          	beqz	a5,100e8 <totalHammingDistance+0x74>
   100dc:	fe442783          	lw	a5,-28(s0)
   100e0:	00178793          	addi	a5,a5,1
   100e4:	fef42223          	sw	a5,-28(s0)
   100e8:	fea45783          	lhu	a5,-22(s0)
   100ec:	00178793          	addi	a5,a5,1
   100f0:	fef41523          	sh	a5,-22(s0)
   100f4:	fea45703          	lhu	a4,-22(s0)
   100f8:	00200793          	li	a5,2
   100fc:	fae7fae3          	bgeu	a5,a4,100b0 <totalHammingDistance+0x3c>
   10100:	fda45703          	lhu	a4,-38(s0)
   10104:	fe442783          	lw	a5,-28(s0)
   10108:	40f707b3          	sub	a5,a4,a5
   1010c:	01079793          	slli	a5,a5,0x10
   10110:	0107d793          	srli	a5,a5,0x10
   10114:	fe442703          	lw	a4,-28(s0)
   10118:	01071713          	slli	a4,a4,0x10
   1011c:	01075713          	srli	a4,a4,0x10
   10120:	00070593          	mv	a1,a4
   10124:	00078513          	mv	a0,a5
   10128:	0f8000ef          	jal	ra,10220 <__mulsi3>
   1012c:	00050793          	mv	a5,a0
   10130:	01079713          	slli	a4,a5,0x10
   10134:	01075713          	srli	a4,a4,0x10
   10138:	fee45783          	lhu	a5,-18(s0)
   1013c:	00f707b3          	add	a5,a4,a5
   10140:	fef41723          	sh	a5,-18(s0)
   10144:	fec45783          	lhu	a5,-20(s0)
   10148:	00178793          	addi	a5,a5,1
   1014c:	fef41623          	sh	a5,-20(s0)
   10150:	fec45703          	lhu	a4,-20(s0)
   10154:	00f00793          	li	a5,15
   10158:	f4e7f6e3          	bgeu	a5,a4,100a4 <totalHammingDistance+0x30>
   1015c:	fee45783          	lhu	a5,-18(s0)
   10160:	00078513          	mv	a0,a5
   10164:	02c12083          	lw	ra,44(sp)
   10168:	02812403          	lw	s0,40(sp)
   1016c:	03010113          	addi	sp,sp,48
   10170:	00008067          	ret

00010174 <_start>:
   10174:	fd010113          	addi	sp,sp,-48
   10178:	02112623          	sw	ra,44(sp)
   1017c:	02812423          	sw	s0,40(sp)
   10180:	03010413          	addi	s0,sp,48
   10184:	000117b7          	lui	a5,0x11
   10188:	25078793          	addi	a5,a5,592 # 11250 <__DATA_BEGIN__>
   1018c:	0007a703          	lw	a4,0(a5)
   10190:	fce42e23          	sw	a4,-36(s0)
   10194:	0047a783          	lw	a5,4(a5)
   10198:	fef42023          	sw	a5,-32(s0)
   1019c:	400027b7          	lui	a5,0x40002
   101a0:	fef42423          	sw	a5,-24(s0)
   101a4:	000107b7          	lui	a5,0x10
   101a8:	23878793          	addi	a5,a5,568 # 10238 <__mulsi3+0x18>
   101ac:	fef42623          	sw	a5,-20(s0)
   101b0:	0200006f          	j	101d0 <_start+0x5c>
   101b4:	fec42783          	lw	a5,-20(s0)
   101b8:	0007c703          	lbu	a4,0(a5)
   101bc:	fe842783          	lw	a5,-24(s0)
   101c0:	00e78023          	sb	a4,0(a5)
   101c4:	fec42783          	lw	a5,-20(s0)
   101c8:	00178793          	addi	a5,a5,1
   101cc:	fef42623          	sw	a5,-20(s0)
   101d0:	fec42783          	lw	a5,-20(s0)
   101d4:	0007c783          	lbu	a5,0(a5)
   101d8:	fc079ee3          	bnez	a5,101b4 <_start+0x40>
   101dc:	fdc40793          	addi	a5,s0,-36
   101e0:	00400593          	li	a1,4
   101e4:	00078513          	mv	a0,a5
   101e8:	e8dff0ef          	jal	ra,10074 <totalHammingDistance>
   101ec:	fea42223          	sw	a0,-28(s0)
   101f0:	fe442783          	lw	a5,-28(s0)
   101f4:	0ff7f793          	zext.b	a5,a5
   101f8:	03078793          	addi	a5,a5,48
   101fc:	0ff7f713          	zext.b	a4,a5
   10200:	fe842783          	lw	a5,-24(s0)
   10204:	00e78023          	sb	a4,0(a5)
   10208:	00000793          	li	a5,0
   1020c:	00078513          	mv	a0,a5
   10210:	02c12083          	lw	ra,44(sp)
   10214:	02812403          	lw	s0,40(sp)
   10218:	03010113          	addi	sp,sp,48
   1021c:	00008067          	ret

00010220 <__mulsi3>:
   10220:	862a                	mv	a2,a0
   10222:	4501                	li	a0,0
   10224:	0015f693          	andi	a3,a1,1
   10228:	c291                	beqz	a3,1022c <__mulsi3+0xc>
   1022a:	9532                	add	a0,a0,a2
   1022c:	8185                	srli	a1,a1,0x1
   1022e:	0606                	slli	a2,a2,0x1
   10230:	f9f5                	bnez	a1,10224 <__mulsi3+0x4>
   10232:	8082                	ret

First I need to introduce how a instruction run in Ripes simulator.Choose a instrction from fetch,and we will see how it progress to the end.

Complied by -O3 (Optimized by speed)

test2:     file format elf32-littleriscv


Disassembly of section .text:

00010074 <totalHammingDistance>:
   10074:	00600513          	li	a0,6
   10078:	00008067          	ret

0001007c <_start>:
   1007c:	000107b7          	lui	a5,0x10
   10080:	0ac78793          	addi	a5,a5,172 # 100ac <_start+0x30>
   10084:	05400713          	li	a4,84
   10088:	400026b7          	lui	a3,0x40002
   1008c:	00e68023          	sb	a4,0(a3) # 40002000 <__global_pointer$+0x3fff0740>
   10090:	0017c703          	lbu	a4,1(a5)
   10094:	00178793          	addi	a5,a5,1
   10098:	fe071ae3          	bnez	a4,1008c <_start+0x10>
   1009c:	03600793          	li	a5,54
   100a0:	00f68023          	sb	a5,0(a3)
   100a4:	00000513          	li	a0,0
   100a8:	00008067          	ret

First I need to introduce how a instruction run in Ripes simulator.Choose a instrction from fetch,and we will see how it progress to the end.

Complied by -Os (Optimized by size)

test3:     file format elf32-littleriscv


Disassembly of section .text:

00010074 <totalHammingDistance>:
   10074:	fd010113          	addi	sp,sp,-48
   10078:	02112623          	sw	ra,44(sp)
   1007c:	02812423          	sw	s0,40(sp)
   10080:	03010413          	addi	s0,sp,48
   10084:	fca42e23          	sw	a0,-36(s0)
   10088:	00058793          	mv	a5,a1
   1008c:	fcf41d23          	sh	a5,-38(s0)
   10090:	fe041723          	sh	zero,-18(s0)
   10094:	fe041623          	sh	zero,-20(s0)
   10098:	fe041523          	sh	zero,-22(s0)
   1009c:	fe041623          	sh	zero,-20(s0)
   100a0:	0b00006f          	j	10150 <totalHammingDistance+0xdc>
   100a4:	fe042223          	sw	zero,-28(s0)
   100a8:	fe041523          	sh	zero,-22(s0)
   100ac:	0480006f          	j	100f4 <totalHammingDistance+0x80>
   100b0:	fea45783          	lhu	a5,-22(s0)
   100b4:	00179793          	slli	a5,a5,0x1
   100b8:	fdc42703          	lw	a4,-36(s0)
   100bc:	00f707b3          	add	a5,a4,a5
   100c0:	0007d783          	lhu	a5,0(a5)
   100c4:	fef41123          	sh	a5,-30(s0)
   100c8:	fe245703          	lhu	a4,-30(s0)
   100cc:	fec45783          	lhu	a5,-20(s0)
   100d0:	40f757b3          	sra	a5,a4,a5
   100d4:	0017f793          	andi	a5,a5,1
   100d8:	00078863          	beqz	a5,100e8 <totalHammingDistance+0x74>
   100dc:	fe442783          	lw	a5,-28(s0)
   100e0:	00178793          	addi	a5,a5,1
   100e4:	fef42223          	sw	a5,-28(s0)
   100e8:	fea45783          	lhu	a5,-22(s0)
   100ec:	00178793          	addi	a5,a5,1
   100f0:	fef41523          	sh	a5,-22(s0)
   100f4:	fea45703          	lhu	a4,-22(s0)
   100f8:	00200793          	li	a5,2
   100fc:	fae7fae3          	bgeu	a5,a4,100b0 <totalHammingDistance+0x3c>
   10100:	fda45703          	lhu	a4,-38(s0)
   10104:	fe442783          	lw	a5,-28(s0)
   10108:	40f707b3          	sub	a5,a4,a5
   1010c:	01079793          	slli	a5,a5,0x10
   10110:	0107d793          	srli	a5,a5,0x10
   10114:	fe442703          	lw	a4,-28(s0)
   10118:	01071713          	slli	a4,a4,0x10
   1011c:	01075713          	srli	a4,a4,0x10
   10120:	00070593          	mv	a1,a4
   10124:	00078513          	mv	a0,a5
   10128:	0f8000ef          	jal	ra,10220 <__mulsi3>
   1012c:	00050793          	mv	a5,a0
   10130:	01079713          	slli	a4,a5,0x10
   10134:	01075713          	srli	a4,a4,0x10
   10138:	fee45783          	lhu	a5,-18(s0)
   1013c:	00f707b3          	add	a5,a4,a5
   10140:	fef41723          	sh	a5,-18(s0)
   10144:	fec45783          	lhu	a5,-20(s0)
   10148:	00178793          	addi	a5,a5,1
   1014c:	fef41623          	sh	a5,-20(s0)
   10150:	fec45703          	lhu	a4,-20(s0)
   10154:	00f00793          	li	a5,15
   10158:	f4e7f6e3          	bgeu	a5,a4,100a4 <totalHammingDistance+0x30>
   1015c:	fee45783          	lhu	a5,-18(s0)
   10160:	00078513          	mv	a0,a5
   10164:	02c12083          	lw	ra,44(sp)
   10168:	02812403          	lw	s0,40(sp)
   1016c:	03010113          	addi	sp,sp,48
   10170:	00008067          	ret

00010174 <_start>:
   10174:	fd010113          	addi	sp,sp,-48
   10178:	02112623          	sw	ra,44(sp)
   1017c:	02812423          	sw	s0,40(sp)
   10180:	03010413          	addi	s0,sp,48
   10184:	000117b7          	lui	a5,0x11
   10188:	25078793          	addi	a5,a5,592 # 11250 <__DATA_BEGIN__>
   1018c:	0007a703          	lw	a4,0(a5)
   10190:	fce42e23          	sw	a4,-36(s0)
   10194:	0047a783          	lw	a5,4(a5)
   10198:	fef42023          	sw	a5,-32(s0)
   1019c:	400027b7          	lui	a5,0x40002
   101a0:	fef42423          	sw	a5,-24(s0)
   101a4:	000107b7          	lui	a5,0x10
   101a8:	23878793          	addi	a5,a5,568 # 10238 <__mulsi3+0x18>
   101ac:	fef42623          	sw	a5,-20(s0)
   101b0:	0200006f          	j	101d0 <_start+0x5c>
   101b4:	fec42783          	lw	a5,-20(s0)
   101b8:	0007c703          	lbu	a4,0(a5)
   101bc:	fe842783          	lw	a5,-24(s0)
   101c0:	00e78023          	sb	a4,0(a5)
   101c4:	fec42783          	lw	a5,-20(s0)
   101c8:	00178793          	addi	a5,a5,1
   101cc:	fef42623          	sw	a5,-20(s0)
   101d0:	fec42783          	lw	a5,-20(s0)
   101d4:	0007c783          	lbu	a5,0(a5)
   101d8:	fc079ee3          	bnez	a5,101b4 <_start+0x40>
   101dc:	fdc40793          	addi	a5,s0,-36
   101e0:	00400593          	li	a1,4
   101e4:	00078513          	mv	a0,a5
   101e8:	e8dff0ef          	jal	ra,10074 <totalHammingDistance>
   101ec:	fea42223          	sw	a0,-28(s0)
   101f0:	fe442783          	lw	a5,-28(s0)
   101f4:	0ff7f793          	zext.b	a5,a5
   101f8:	03078793          	addi	a5,a5,48
   101fc:	0ff7f713          	zext.b	a4,a5
   10200:	fe842783          	lw	a5,-24(s0)
   10204:	00e78023          	sb	a4,0(a5)
   10208:	00000793          	li	a5,0
   1020c:	00078513          	mv	a0,a5
   10210:	02c12083          	lw	ra,44(sp)
   10214:	02812403          	lw	s0,40(sp)
   10218:	03010113          	addi	sp,sp,48
   1021c:	00008067          	ret

00010220 <__mulsi3>:
   10220:	862a                	mv	a2,a0
   10222:	4501                	li	a0,0
   10224:	0015f693          	andi	a3,a1,1
   10228:	c291                	beqz	a3,1022c <__mulsi3+0xc>
   1022a:	9532                	add	a0,a0,a2
   1022c:	8185                	srli	a1,a1,0x1
   1022e:	0606                	slli	a2,a2,0x1
   10230:	f9f5                	bnez	a1,10224 <__mulsi3+0x4>
   10232:	8082                	ret

First I need to introduce how a instruction run in Ripes simulator.Choose a instrction from fetch,and we will see how it progress to the end.

Brief comparison

  • speed
    Dependency means when pipeline CPU encounter RAW issue,need to do some forwarding or the performance will be very low. Because this CPU is in order issue in order execution, we don't need to care about other dependency like WAR,WAW.

  • size
    In this case we can see there are two registers.srl will update x17 register,meanwhile andi need x17 source register.Because CPU has forwarding mechanism so andi x17 can read the latest data without inserting a bubble.

    o3 o0 os
    Type speed original size
    Excution time 85931ns 91061ns 80772ns
    Instruction count 188 414 213
    IPS 2187801 4546402 2637052
    Jumps Forward 1 5 36
    Jumps Backwards 38 42 41
    Branching 38(92.68%) 40(90.91%) 39(90.7%)
    Lines 78 78 39
    Text 312 344 188
    Data 4 4 0