contributed by david965154
In some numerical dataset, the head of number will fill with the 0's to make them look neater.
For example:
NO. | PRICE | VALUE |
---|---|---|
01 | 020.0 | 010 |
02 | 035.0 | 110 |
03 | 120.0 | 650 |
. | 068.0 | 566 |
. | 600.0 | 003 |
30 | 251.0 | 005 |
In this sheet, the column of NO. are all fill with two digits. And in column of PRICE and VALUE are also have the same digits.
Source Code
Attention : Here is the 32bit ver.
#include <stdint.h>
#include <stdio.h>
uint16_t count_leading_zeros(uint32_t x)
{
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
/* count ones (population count) */
x -= ((x >> 1) & 0x55555555 );
x = ((x >> 2) & 0x33333333) + (x & 0x33333333 );
x = ((x >> 4) + x) & 0x0f0f0f0f;
x += (x >> 8);
x += (x >> 16);
return (32 - (x & 0x7f));
}
int main(){
uint32_t x,y;
scanf("%u %u",&x, &y);
x = count_leading_zeros(x);
y = count_leading_zeros(y);
printf("%u",(x>y)?(x-y):(y-x));
return 0;
}
The original code is counting leading zeros for 64 bits unsign integer.If you wonder how to reach the goal of implement that with rv32i, you can try to separated x
to x/(2^32)
and x%(x^32)
. Last, add those two result, you can get the answer.
.data
#test case 1
x1: .word 16
y1: .word 12
#test case 2
x2: .word 25
y2: .word 800
#test case 3
x3: .word 956
y3: .word 85
string1: .string "\nshould keep zeros:"
.text
.globl main
main:
nop
addi t0,t0,1
addi sp, sp, -4
sw t0,0(sp)
addi t5, zero, 3
la a0, string1
li a7, 4
ecall
addi t0,t0,-2
blt t0, zero, firstx
beq t0, zero, secondx
bge t0, zero, thirdx
firstx:
#read x
lw a0, x1
#call func
jal ra, func
#store in s1
sw a0, 0(s1)
firsty:
#read y
lw a0, y1
#call func
jal ra, func
jal ra, calcu
secondx:
#read x
lw a0, x2
#call func
jal ra, func
#store in s1
sw a0, 0(s1)
secondy:
#read y
lw a0, y2
#call func
jal ra, func
jal ra, calcu
thirdx:
#read x
lw a0, x3
#call func
jal ra, func
#store in s1
sw a0, 0(s1)
thirdy:
#read y
lw a0, y3
#call func
jal ra, func
jal ra, calcu
calcu:
#load s1 to t0
lw t0, 0(s1)
blt a0, t0, no
#a0>t0
sub a0, a0, t0
li a7, 1
ecall
jal ra, restore
#t0>a0
no:
sub a0, t0, a0
li a7, 1
ecall
jal ra, restore
restore:
lw t0,0(sp)
addi sp, sp, 4
beq t0, t5, exit
jal ra, main
exit:
li a7, 10
ecall
#t1 = shift num
func:
addi t1, t1, 1
loop:
sra t2, a0, t1
or a0, a0, t2
#if t1 == 16 co
addi t3, t1, -16
beq t3, x0, co
#t1 = t1*2
slli t1, t1, 1
jal x0, loop
co:
#x -= ((x >> 1) & 0x55555555 );
srai t1, a0, 1
lui t4, 0x55555
ori t4, t4, 0x555
and t1, t1, t4
sub a0, a0, t1
#x = ((x >> 2) & 0x33333333) + (x & 0x33333333 );
srai t1, a0, 2
lui t4, 0x33333
ori t4, t4, 0x333
and t1, t1, t4
and a0, a0, t4
add a0, a0, t1
# x = ((x >> 4) + x) & 0x0f0f0f0f;
srai t1, a0, 4
add a0, a0, t1
lui t4, 0x0f0f0
ori t4, t4, 0x787
addi t4, t4, 0x788
and a0, a0, t4
#x += (x >> 8);
srai t1, a0, 8
add a0, a0, t1
#x += (x >> 16);
srai t1, a0, 16
add a0, a0, t1
#return (32 - (x & 0x7f));
andi a0, a0, 0x7f
addi t3, t3, 32
sub a0, t3, a0
ret
Let us focus on the nop
in the title, it was the trap that I spent a lot of time to debug. After I operatedlw t0,0(sp)
(At the last, I will talk about instruction lw
), the wb
will happen in last step. After 1.addi sp, sp, 4
2.beq t0, t5, exit
3.jal ra, main
, I need to operate addi t0,t0,1
, but the t0
still not be update. In addition, the lw t0,0(sp)
update the t0 after addi t0,t0,1
, it also cause the infinite loop.
by Ripes
00000000 <main>:
0: 00000013 addi x0 x0 0
4: 00000013 addi x0 x0 0
8: 00000013 addi x0 x0 0
c: 00128293 addi x5 x5 1
10: ffc10113 addi x2 x2 -4
14: 00512023 sw x5 0 x2
18: 00300f13 addi x30 x0 3
1c: 10000517 auipc x10 0x10000
20: ffc50513 addi x10 x10 -4
24: 00400893 addi x17 x0 4
28: 00000073 ecall
2c: ffe28293 addi x5 x5 -2
30: 0002c663 blt x5 x0 12 <firstx>
34: 02028463 beq x5 x0 40 <secondx>
38: 0402d263 bge x5 x0 68 <thirdx>
0000003c <firstx>:
3c: 10000517 auipc x10 0x10000
40: fc452503 lw x10 -60 x10
44: 098000ef jal x1 152 <func>
48: 00a4a023 sw x10 0 x9
0000004c <firsty>:
4c: 10000517 auipc x10 0x10000
50: fb852503 lw x10 -72 x10
54: 088000ef jal x1 136 <func>
58: 044000ef jal x1 68 <calcu>
0000005c <secondx>:
5c: 10000517 auipc x10 0x10000
60: fac52503 lw x10 -84 x10
64: 078000ef jal x1 120 <func>
68: 00a4a023 sw x10 0 x9
0000006c <secondy>:
6c: 10000517 auipc x10 0x10000
70: fa052503 lw x10 -96 x10
74: 068000ef jal x1 104 <func>
78: 024000ef jal x1 36 <calcu>
0000007c <thirdx>:
7c: 10000517 auipc x10 0x10000
80: f9452503 lw x10 -108 x10
84: 058000ef jal x1 88 <func>
88: 00a4a023 sw x10 0 x9
0000008c <thirdy>:
8c: 10000517 auipc x10 0x10000
90: f8852503 lw x10 -120 x10
94: 048000ef jal x1 72 <func>
98: 004000ef jal x1 4 <calcu>
0000009c <calcu>:
9c: 0004a283 lw x5 0 x9
a0: 00554a63 blt x10 x5 20 <no>
a4: 40550533 sub x10 x10 x5
a8: 00100893 addi x17 x0 1
ac: 00000073 ecall
b0: 014000ef jal x1 20 <restore>
000000b4 <no>:
b4: 40a28533 sub x10 x5 x10
b8: 00100893 addi x17 x0 1
bc: 00000073 ecall
c0: 004000ef jal x1 4 <restore>
000000c4 <restore>:
c4: 00012283 lw x5 0 x2
c8: 00410113 addi x2 x2 4
cc: 01e28463 beq x5 x30 8 <exit>
d0: f31ff0ef jal x1 -208 <main>
000000d4 <exit>:
d4: 00a00893 addi x17 x0 10
d8: 00000073 ecall
000000dc <func>:
dc: 00130313 addi x6 x6 1
000000e0 <loop>:
e0: 406553b3 sra x7 x10 x6
e4: 00756533 or x10 x10 x7
e8: ff030e13 addi x28 x6 -16
ec: 000e0663 beq x28 x0 12 <co>
f0: 00131313 slli x6 x6 1
f4: fedff06f jal x0 -20 <loop>
000000f8 <co>:
f8: 40155313 srai x6 x10 1
fc: 55555eb7 lui x29 0x55555
100: 555eee93 ori x29 x29 1365
104: 01d37333 and x6 x6 x29
108: 40650533 sub x10 x10 x6
10c: 40255313 srai x6 x10 2
110: 33333eb7 lui x29 0x33333
114: 333eee93 ori x29 x29 819
118: 01d37333 and x6 x6 x29
11c: 01d57533 and x10 x10 x29
120: 00650533 add x10 x10 x6
124: 40455313 srai x6 x10 4
128: 00650533 add x10 x10 x6
12c: 0f0f0eb7 lui x29 0xf0f0
130: 787eee93 ori x29 x29 1927
134: 788e8e93 addi x29 x29 1928
138: 01d57533 and x10 x10 x29
13c: 40855313 srai x6 x10 8
140: 00650533 add x10 x10 x6
144: 41055313 srai x6 x10 16
148: 00650533 add x10 x10 x6
14c: 07f57513 andi x10 x10 127
150: 020e0e13 addi x28 x28 32
154: 40ae0533 sub x10 x28 x10
158: 00008067 jalr x0 x1 0
Choose the 5 stage processor.
In the 5 stage processor, you can see 4 bars which have some word on them. The first bar was IF/ID
it means that part IF is on its left hand side, and part ID is on its right hand side. So are others,they just need to be adjust with the word on its bar.
Take the instruction lw x10 -60 x10
for example:
Sub instruction format is lw rd, simm12(rs1)
lw x10 -60 x10
is 0x00000040
. So it input 0x00000040
into the Instruction memory.0xfc452503
, also is lw x10 -60 x10
will get into the next stage.0xfc452503
, the registers get the register1 index 0x0a
, so that get the number 0x10000018
LW
and instruction0xfc452503
, we can get 0xffffffc4
also is -60.x10
and -60
to get the memory address.auipc x10 0x10000
the x10
need to be update to 0x1000003c
, this is why the ALU Op1 is not 0x10000018
0x10000000
, so we can get 0x00000010
0x00000010
to the registerx10
, the data line transport 0x00000010
back to register(Below is the register block in same time)