Try   HackMD

Output different number with same binary format by using counting leading zeros

contributed by david965154

In some numerical dataset, the head of number will fill with the 0's to make them look neater.
For example:

NO. PRICE VALUE
01 020.0 010
02 035.0 110
03 120.0 650
. 068.0 566
. 600.0 003
30 251.0 005

In this sheet, the column of NO. are all fill with two digits. And in column of PRICE and VALUE are also have the same digits.


Implementation

Source Code
Attention : Here is the 32bit ver.

C code

#include <stdint.h>
#include <stdio.h>
uint16_t count_leading_zeros(uint32_t x)
{
    x |= (x >> 1);
    x |= (x >> 2);
    x |= (x >> 4);
    x |= (x >> 8);
    x |= (x >> 16);

    /* count ones (population count) */
    x -= ((x >> 1) & 0x55555555 );
    x = ((x >> 2) & 0x33333333) + (x & 0x33333333 );
    x = ((x >> 4) + x) & 0x0f0f0f0f;
    x += (x >> 8);
    x += (x >> 16);

    return (32 - (x & 0x7f));
}

int main(){
    uint32_t x,y;
    scanf("%u %u",&x, &y);
    x = count_leading_zeros(x);
    y = count_leading_zeros(y);
    printf("%u",(x>y)?(x-y):(y-x));

    return 0;
}

Assembly code

The original code is counting leading zeros for 64 bits unsign integer.If you wonder how to reach the goal of implement that with rv32i, you can try to separated x to x/(2^32) and x%(x^32). Last, add those two result, you can get the answer.

.data
    #test case 1
    x1: .word 16
    y1: .word 12
    #test case 2
    x2: .word 25
    y2: .word 800
    #test case 3
    x3: .word 956
    y3: .word 85 
    string1: .string  "\nshould keep zeros:"

.text
.globl main
main:
    nop
    addi t0,t0,1
    addi sp, sp, -4
    sw t0,0(sp)
    addi t5, zero, 3
    
    la a0, string1
    li a7, 4
    ecall
    
    addi t0,t0,-2
    blt t0, zero, firstx
    beq t0, zero, secondx
    bge t0, zero, thirdx
    
firstx:
    #read x
    lw a0, x1         
    #call func                  
    jal ra, func
    #store in s1
    sw a0, 0(s1)
    
firsty: 
    #read y
    lw a0, y1    
    #call func              
    jal ra, func
    jal ra, calcu
    
secondx:
    #read x
    lw a0, x2         
    #call func                  
    jal ra, func
    #store in s1
    sw a0, 0(s1)
    
secondy: 
    #read y
    lw a0, y2    
    #call func              
    jal ra, func
    jal ra, calcu
    
thirdx:
    #read x
    lw a0, x3         
    #call func                  
    jal ra, func
    #store in s1
    sw a0, 0(s1)
    
thirdy: 
    #read y
    lw a0, y3    
    #call func              
    jal ra, func
    jal ra, calcu
    
calcu:
    #load s1 to t0
    lw t0, 0(s1)
    blt a0, t0, no
    
    #a0>t0
    sub a0, a0, t0
    li a7, 1
    ecall
    jal ra, restore
    
    #t0>a0
no:
    sub a0, t0, a0
    li a7, 1
    ecall
    jal ra, restore

restore:
    lw t0,0(sp)
    addi sp, sp, 4
    beq t0, t5, exit
    jal ra, main
    
exit:
    li a7, 10
    ecall
    
    #t1 = shift num
func:
    addi  t1, t1, 1 
loop:
    sra   t2, a0, t1     
    or    a0, a0, t2    
    #if t1 == 16 co
    addi  t3, t1, -16     
    beq   t3, x0, co   
    #t1 = t1*2
    slli  t1, t1, 1      
    jal   x0, loop
co:
    #x -= ((x >> 1) & 0x55555555 );
    srai  t1, a0, 1
    lui t4, 0x55555
    ori t4, t4, 0x555
    and  t1, t1, t4
    sub   a0, a0, t1
    #x = ((x >> 2) & 0x33333333) + (x & 0x33333333 );
    srai  t1, a0, 2
    lui t4, 0x33333
    ori t4, t4, 0x333
    and  t1, t1, t4
    and  a0, a0, t4
    add   a0, a0, t1
    # x = ((x >> 4) + x) & 0x0f0f0f0f;
    srai  t1, a0, 4
    add   a0, a0, t1
    lui t4, 0x0f0f0
    ori t4, t4, 0x787
    addi t4, t4, 0x788
    and  a0, a0, t4
    #x += (x >> 8);
    srai  t1, a0, 8
    add   a0, a0, t1
    #x += (x >> 16);
    srai  t1, a0, 16
    add   a0, a0, t1
    #return (32 - (x & 0x7f));
    andi  a0, a0, 0x7f
    addi  t3, t3, 32
    sub   a0, t3, a0

    ret

Let us focus on the nop in the title, it was the trap that I spent a lot of time to debug. After I operatedlw t0,0(sp) (At the last, I will talk about instruction lw), the wb will happen in last step. After 1.addi sp, sp, 4 2.beq t0, t5, exit 3.jal ra, main, I need to operate addi t0,t0,1, but the t0 still not be update. In addition, the lw t0,0(sp) update the t0 after addi t0,t0,1, it also cause the infinite loop.


Analysis

Pseudo instruction

by Ripes


00000000 <main>:
    0:        00000013        addi x0 x0 0
    4:        00000013        addi x0 x0 0
    8:        00000013        addi x0 x0 0
    c:        00128293        addi x5 x5 1
    10:        ffc10113        addi x2 x2 -4
    14:        00512023        sw x5 0 x2
    18:        00300f13        addi x30 x0 3
    1c:        10000517        auipc x10 0x10000
    20:        ffc50513        addi x10 x10 -4
    24:        00400893        addi x17 x0 4
    28:        00000073        ecall
    2c:        ffe28293        addi x5 x5 -2
    30:        0002c663        blt x5 x0 12 <firstx>
    34:        02028463        beq x5 x0 40 <secondx>
    38:        0402d263        bge x5 x0 68 <thirdx>

0000003c <firstx>:
    3c:        10000517        auipc x10 0x10000
    40:        fc452503        lw x10 -60 x10
    44:        098000ef        jal x1 152 <func>
    48:        00a4a023        sw x10 0 x9

0000004c <firsty>:
    4c:        10000517        auipc x10 0x10000
    50:        fb852503        lw x10 -72 x10
    54:        088000ef        jal x1 136 <func>
    58:        044000ef        jal x1 68 <calcu>

0000005c <secondx>:
    5c:        10000517        auipc x10 0x10000
    60:        fac52503        lw x10 -84 x10
    64:        078000ef        jal x1 120 <func>
    68:        00a4a023        sw x10 0 x9

0000006c <secondy>:
    6c:        10000517        auipc x10 0x10000
    70:        fa052503        lw x10 -96 x10
    74:        068000ef        jal x1 104 <func>
    78:        024000ef        jal x1 36 <calcu>

0000007c <thirdx>:
    7c:        10000517        auipc x10 0x10000
    80:        f9452503        lw x10 -108 x10
    84:        058000ef        jal x1 88 <func>
    88:        00a4a023        sw x10 0 x9

0000008c <thirdy>:
    8c:        10000517        auipc x10 0x10000
    90:        f8852503        lw x10 -120 x10
    94:        048000ef        jal x1 72 <func>
    98:        004000ef        jal x1 4 <calcu>

0000009c <calcu>:
    9c:        0004a283        lw x5 0 x9
    a0:        00554a63        blt x10 x5 20 <no>
    a4:        40550533        sub x10 x10 x5
    a8:        00100893        addi x17 x0 1
    ac:        00000073        ecall
    b0:        014000ef        jal x1 20 <restore>

000000b4 <no>:
    b4:        40a28533        sub x10 x5 x10
    b8:        00100893        addi x17 x0 1
    bc:        00000073        ecall
    c0:        004000ef        jal x1 4 <restore>

000000c4 <restore>:
    c4:        00012283        lw x5 0 x2
    c8:        00410113        addi x2 x2 4
    cc:        01e28463        beq x5 x30 8 <exit>
    d0:        f31ff0ef        jal x1 -208 <main>

000000d4 <exit>:
    d4:        00a00893        addi x17 x0 10
    d8:        00000073        ecall

000000dc <func>:
    dc:        00130313        addi x6 x6 1

000000e0 <loop>:
    e0:        406553b3        sra x7 x10 x6
    e4:        00756533        or x10 x10 x7
    e8:        ff030e13        addi x28 x6 -16
    ec:        000e0663        beq x28 x0 12 <co>
    f0:        00131313        slli x6 x6 1
    f4:        fedff06f        jal x0 -20 <loop>

000000f8 <co>:
    f8:        40155313        srai x6 x10 1
    fc:        55555eb7        lui x29 0x55555
    100:        555eee93        ori x29 x29 1365
    104:        01d37333        and x6 x6 x29
    108:        40650533        sub x10 x10 x6
    10c:        40255313        srai x6 x10 2
    110:        33333eb7        lui x29 0x33333
    114:        333eee93        ori x29 x29 819
    118:        01d37333        and x6 x6 x29
    11c:        01d57533        and x10 x10 x29
    120:        00650533        add x10 x10 x6
    124:        40455313        srai x6 x10 4
    128:        00650533        add x10 x10 x6
    12c:        0f0f0eb7        lui x29 0xf0f0
    130:        787eee93        ori x29 x29 1927
    134:        788e8e93        addi x29 x29 1928
    138:        01d57533        and x10 x10 x29
    13c:        40855313        srai x6 x10 8
    140:        00650533        add x10 x10 x6
    144:        41055313        srai x6 x10 16
    148:        00650533        add x10 x10 x6
    14c:        07f57513        andi x10 x10 127
    150:        020e0e13        addi x28 x28 32
    154:        40ae0533        sub x10 x28 x10
    158:        00008067        jalr x0 x1 0

5-stage pipelined processor

Choose the 5 stage processor.

5-stage:

  1. Instruction fetch (IF)
  2. Instruction decode and register fetch (ID)
  3. Execute (EX)
  4. Memory access (MEM)
  5. Register write back (WB)

In the 5 stage processor, you can see 4 bars which have some word on them. The first bar was IF/ID it means that part IF is on its left hand side, and part ID is on its right hand side. So are others,they just need to be adjust with the word on its bar.

Take the instruction lw x10 -60 x10for example:
Sub instruction format is lw rd, simm12(rs1)

1.Instruction fetch (IF)

  • The address of instruction lw x10 -60 x10 is 0x00000040. So it input 0x00000040 into the Instruction memory.
  • The corresponding instruction is 0xfc452503 , also is lw x10 -60 x10 will get into the next stage.
  • Because there is no jump or branch instruction.The PC will add by 4.

2.Instruction decode and register fetch (ID)

  • After decode the instruction 0xfc452503, the registers get the register1 index 0x0a, so that get the number 0x10000018
  • With the opcode LW and instruction0xfc452503, we can get 0xffffffc4 also is -60.

3.Execute (EX)

  • The ALU add x10 and -60 to get the memory address.
  • Note that:
  • Because of the last instruction auipc x10 0x10000 the x10 need to be update to 0x1000003c, this is why the ALU Op1 is not 0x10000018

4.Memory access (MEM)

  • The memory get the value store in data memory address 0x10000000, so we can get 0x00000010

5.Register write back (WB)

  • To write back the 0x00000010 to the registerx10, the data line transport 0x00000010 back to register(Below is the register block in same time)