Implementation

Topic : Convert FP32 to BF16 and Count the Number of Ones in the Binary Representation

test data

Single-precision (FP32)	bloat16 as HEX literals	bfloat16 as binary literals
1.200000	0x3f99999a	00111111100110011001100110011010
1.203125	0x3f9a0000	00111111100110100000000000000000
2.312500	0x40140000	01000000000101000000000000000000

Question : Why, after the code *p &= 0xFFFF0000, aren't the last 16 bits of y supposed to be 0?

c code

#include <stdio.h>
#include <stdlib.h>
float fp32_to_bf16(float x)
{
    float y = x;
    int* p = (int*)&y;
    unsigned int exp = *p & 0x7F800000;
    unsigned int man = *p & 0x007FFFFF;
    if (exp == 0 && man == 0) /* zero */
        return x;
    if (exp == 0x7F800000 /* Fill this! */) /* infinity or NaN */
        return x;

    /* Normalized number */
    /* round to nearest */
    float r = x;
    int* pr = (int*)&r;
    *pr &= 0xFF800000;  /* r has the same exp as x */
    r /= 0x100 /* Fill this! */;
    y = x + r;

    *p &= 0xFFFF0000;

    int count = 0;    //bitcount
    while (y)
    {
        count++;
        *p &= (*p - 1);
    }
    
    return count;
}
int main()
{
    int count = 0;
    float a = 1.200000, b = 1.203125, c = 2.312500;

    count = (float)fp32_to_bf16(a);
    printf("The number %lf has %d bits set to 1.\n", a, count);
    count = (float)fp32_to_bf16(b);
    printf("The number %lf has %d bits set to 1.\n", b, count);
    count = (float)fp32_to_bf16(c);
    printf("The number %lf has %d bits set to 1.\n", c, count);
    
    system("pause");
    return 0;
}

Assembly code

First version

Attempting to convert Problem B from C code to RISC-V.

.data
    test0: .word 1067030938                         #test0=1.200000
    test1: .word 1067057152                         #test1=1.203125   
    test2: .word 1075052544                         #test2=2.312500  
.text
.globl main
main:
    lw a0, test0                                    #a0=x=2.2
    
    mv t0, a0                                       #t0=y
    li t1, 0x7F800000  
    and t2, t1, a0                                  #t2=exp
    li t3, 0x007FFFFF  
    and t4, t3, a0                                  #t4=man  
    
    beq a0, t1, infinity_or_NaN                     #infinity or NaN   
    beq a0, zero, z                                 #zero 
    
    mv t5, a0                                       #t5=r
    li t6, 0xFF800000                               #r has the same exp as x
    and t5, t6, t5
    
    #mv s2, t5                                      #find s2=exp
    #srli s2, s2, 23
    #addi s2, s2, -8
    #slli s2, s2, 23                                #s2=0xxxxxxxx0...0000 
    #add s2, s2, t4                                 #r/=0x100      value r
    
    # srli t5, t5, 8  (false)
    
    #add t0, a0, s2 (false)
    mv s3, t4                                   
    srli s3, s3, 8                              
    li s4, 0x8000                                    
    or s3, s3, s4                                   #obtain r_man when r_exp no change
                                                    #find y
    add s5, s3, t4                                  #s5=y_man
    add t0, s5, t2                                  #value y
    
    li t1, 0xFFFF0000
    and t0, t0, t1                                  #transfer to bf16
                                                    #t0=y
    mv a0, t0                                     
    j end

infinity_or_NaN:
    j end
z:
    j end
end:
    li a7, 2
    ecall

Second version

Modify the original question to include how many '1's are there in the binary representation of BF16.

.data
    test0: .word 1067030938                         #true t0=0x3f99999a
    test1: .word 1067057152                         #true t0=0x3f9a0000
    test2: .word 1075052544                         #true t0=0x40140000
    str: .string "How many '1's are there in BF16 (binary)? ANS : "
.text
.globl main
main:
    lw a0, test0                                    #x=16
    mv a1, x0                                       #count=0

    mv t0, a0                                       #t0=y
    li t1, 0x7F800000  
    and t2, t1, a0                                  #t2=exp
    li t3, 0x007FFFFF  
    and t4, t3, a0                                  #t4=man  
    
    beq a0, t1, end                                 #infinity or NaN   
    beq t4, zero, end                               #zero 
    
    mv t5, a0                                       #t5=r
    li t6, 0xFF800000                               #r has the same exp as x
    and t5, t6, t5
    
    mv s3, t4                                       #find y = x + r ; r/=0x100
    srli s3, s3, 8                              
    li s4, 0x8000                                    
    or s3, s3, s4                                   #obtain r_man when r_exp no change

    add s5, s3, t4                                  #s5=y_man
    add t0, s5, t2                                  #value y
    
    li t1, 0xFFFF0000
    and t0, t0, t1                                  #transfer to bf16
                                                    #t0=y
    jal ra, count_ones
    
    la a0, str
    li a7, 4
    ecall
    
    mv a0, a1                                     
    j end
    
count_ones:
    addi t1, t0, -1                                 # *p - 1
    and t0, t0, t1                                  #*p &= (*p - 1)
    addi a1, a1, 1                                  #count++
    bne t0, x0, count_ones                          #if t0!=0 goto loop
    ret                                             #goto ra
    
end:
    li a7, 1
    ecall

Result

Image Not Showing Possible Reasons

The image was uploaded to a note which you don't have access to
The note which the image was originally uploaded to has been deleted

Learn More →

Test 0

c : Count to ten '1' in BF16.
Assembly : Count to ten '1' in BF16.

Test 1

c : Count to ten '1' in BF16.
Assembly : Count to ten '1' in BF16.

Test 2

c : Count to one '1' in BF16.
Assembly : Count to one '1' in BF16.

Analysis

Ripes Simulator

stage	description
IF	Instruction Fetch
ID	Instruction Decode & Register Read
EX	Execution or Address Calculation
MEM	Data Memory Access
WB	Write Back

example:mv t0, a0

IF

PC=PC+4

I-Format instruction addi

imm[11:0]	rs1	funct3	rd	opcode
000000000000	01010	000	00101	0010011

After converting it to Hex, you can get 0x00050293

ID

The instruction has been decoded, input R1_idx=0x0a R2_idx=0x00, output Reg1=0x10000000 Reg2=0x00000000

EX

In this stage, there are four multiplexer(MUX) to choise which inputs will be used. ObtainedOp1=0x40140000andOp2=0x00000000. At the red circle, due to receiving the updated x10 from the previous load word (lw) instruction, the value is directly brought back to the multiplexer (MUX) in the write-back (WB) stage.

MEM

We can infer that the Addi instruction does not read or write to memory but instead directly enters the write-back (WB) stage.

WB

In the write-back (WB) stage, the new value is written into the registerx5.

Hazards

Structural Hazard : Two or more instrucbons in the pipeline compete for access to a single physical resource
Data Hazard

solution : 1.stalling : Adding the NOP instruction causes the affected instruction to do nothing. 2.Forwarding : grab operand from pipeline stage,rather than register file

Control Hazard

Reducing Branch Penalties : To improve performance, use “branch prediction” to guess which way branch will go earlier in pipeline

Find my hazard

In my assembly code,jal ra, count_onesis a branch,and exist cotrol hazard.

When jumping to the label <count_ones>, the IF and ID stages will be flushed and converted to NOP.

Improve

To resolve the control hazard, unnecessary jumps are removed.

Third version

.data
    test0: .word 1067030938                         #true t0=0x3f99999a
    test1: .word 1067057152                         #true t0=0x3f9a0000
    test2: .word 1075052544                         #true t0=0x40140000
    str: .string "How many '1's are there in BF16 (binary)? ANS : "
.text
.globl main
main:
    lw a0, test0                                    #x=16
    mv a1, x0                                       #count=0

    mv t0, a0                                       #t0=y
    li t1, 0x7F800000  
    and t2, t1, a0                                  #t2=exp
    li t3, 0x007FFFFF  
    and t4, t3, a0                                  #t4=man  
    
    beq a0, t1, end                                 #infinity or NaN   
    beq t4, zero, end                               #zero 
    beq t4, zero, end                               #zero 
    
    mv t5, a0                                       #t5=r
    li t6, 0xFF800000                               #r has the same exp as x
    and t5, t6, t5
    
    mv s3, t4                                       #find y = x + r ; r/=0x100
    srli s3, s3, 8                              
    li s4, 0x8000                                    
    or s3, s3, s4                                   #obtain r_man when r_exp no change

    add s5, s3, t4                                  #s5=y_man
    add t0, s5, t2                                  #value y
    
    li t1, 0xFFFF0000
    and t0, t0, t1                                  #transfer to bf16
                                                    #t0=y    
count_ones:
    addi t1, t0, -1                                 # *p - 1
    and t0, t0, t1                                  #*p &= (*p - 1)
    addi a1, a1, 1                                  #count++
    bne t0, x0, count_ones                          #if t0!=0 goto loop                                            #goto ra
    
    la a0, str
    li a7, 4
    ecall    
    mv a0, a1                                     
  
end:
    li a7, 1
    ecall

Before

After

Implementation

test data

c code

Assembly code

First version

Second version

Result

Analysis

Ripes Simulator

IF

ID

EX

MEM

WB

Hazards

Find my hazard

Improve

Third version

Reference

Read more

伺服語錄

撰寫 LKM

印表機驅動安裝

2024q1 Homework2 (quiz1+2)