contributed by <linyu425>
If we want to obtain the value of the unknown function f at the point
First, perform linear interpolation in the x-direction,
, and then perform linear interpolation in the y-direction, resulting in
Learn More →
To achieve the aforementioned goals, it is necessary to utilize floating-point multiplication and addition.
I use a two-dimensional array(im_2) to store a 2x2-sized image and eventually saved the enlarged result in a 5x5-sized two-dimensional array(im_5).
In this version, we need to enlarge a 2x2 image to a 5x5 image, so we must fill in 21 values.Each value needs to go through two fmul32 and one fadd32 calculations.
Learn More →
First, I calculate the values within the red box, and in this stage, each value is obtained through linear interpolation.
Learn More →
Then, we calculate the values in the blue box by bilinear interpolation with the values in the red box.
Learn More →
We use a 2*2 grayscale image as an enlargement example.
Learn More →
The following image is the 5x5 enlarged grayscale image obtained based on the above steps.
Learn More →
#include <stdio.h>
#include <stdint.h>
#define IN_N 2
#define OUT_N 5
uint32_t mask_lowest_zero(uint32_t x)
{
uint32_t mask = x;
mask &= (mask << 1) | 0x1;
mask &= (mask << 2) | 0x3;
mask &= (mask << 4) | 0xF;
mask &= (mask << 8) | 0xFF;
mask &= (mask << 16) | 0xFFFF;
return mask;
}
uint32_t count_leading_zeros(uint32_t x) {
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
/* count ones (population count) */
x -= ((x >> 1) & 0x55555555);
x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
x = ((x >> 4) + x) & 0x0f0f0f0f;
x += (x >> 8);
x += (x >> 16);
return (32 - (x & 0x7f));
}
int32_t inc(int32_t x)
{
if (~x == 0)
return 0;
int32_t mask = mask_lowest_zero(x);
int32_t z1 = mask ^ ((mask << 1) | 1);
return (x & ~mask) | z1;
}
static inline int32_t getbit(int32_t value, int n)
{
return (value >> n) & 1;
}
/* int32 multiply */
int32_t imul32(int32_t a, int32_t b)
{
int32_t r = 0;
while(1) {
if((b & 1) != 0) {
r = r + a;
}
b = b >> 1;
if(b == 0x0) break;
r = r >> 1;
}
return r;
}
/* float32 multiply */
float fmul32(float a, float b)
{
int32_t ia = *(int32_t *) &a, ib = *(int32_t *) &b;
/* sign */
int sa = ia >> 31;
int sb = ib >> 31;
/* mantissa */
int32_t ma = (ia & 0x7FFFFF) | 0x800000;
int32_t mb = (ib & 0x7FFFFF) | 0x800000;
/* exponent */
int32_t ea = ((ia >> 23) & 0xFF);
int32_t eb = ((ib >> 23) & 0xFF);
/* 'r' = result */
int32_t mrtmp = imul32(ma, mb);
int mshift = getbit(mrtmp, 24);
int32_t mr = mrtmp >> mshift;
int32_t ertmp = ea + eb - 127;
int32_t er = mshift ? inc(ertmp) : ertmp;
int sr = sa ^ sb;
int32_t r = (sr << 31) | ((er & 0xFF) << 23) | (mr & 0x7FFFFF);
return *(float *) &r;
}
float fadd32(float a, float b) {
int32_t ia = *(int32_t *)&a, ib = *(int32_t *)&b;
int32_t temp;
if (ia & 0x7fffffff < ib & 0x7fffffff){
temp = ia;
ia = ib;
ib = temp;
}
/* sign */
int sa = ia >> 31;
int sb = ib >> 31;
/* mantissa */
int32_t ma = ia & 0x7fffff | 0x800000;
int32_t mb = ib & 0x7fffff | 0x800000;
/* exponent */
int32_t ea = (ia >> 23) & 0xff;
int32_t eb = (ib >> 23) & 0xff;
int32_t ea = (ia >> 23) & 0xff;
int32_t eb = (ib >> 23) & 0xff;
int32_t align = (ea - eb > 24) ? 24 : (ea - eb);
mb >>= align;
if (sa | sb) ma -= mb;
else ma += mb;
int32_t clz = count_leading_zeros(ma);
int32_t shift = 0;
if (clz <= 8) {
shift = 8 - clz;
ma >>= shift;
ea += shift;
}
else {
shift = clz - 8;
ma <<= shift;
ea -= shift;
}
int32_t r = ia & 0x80000000 | ea << 23 | ma & 0x7fffff;
return *(float *) &r;
}
int main()
{
float im_2[2][2] = {{0.95478,0.64721},
{0.823257,0.22245}};
float im_5[5][5] = {
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0}
}
im_5[0][0] = im_2[0][0];
im_5[0][OUT_N-1] = im_2[0][IN_N-1];
im_5[OUT_N-1][0] = im_2[IN_N-1][0];
im_5[OUT_N-1][OUT_N-1] = im_2[IN_N-1][IN_N-1];
for(int i=1;i<4;i++){
im_5[0][i] = fadd32 (fmul32(im_5[0][0] , (float)(OUT_N - 1 - i) / (float)(OUT_N - 1)) , fmul32(im_5[0][OUT_N-1] , (float)(i) / (float)(OUT_N-1)));
im_5[OUT_N-1][i] = fadd32 (fmul32(im_5[OUT_N-1][0] , (float)(OUT_N - 1 - i) /(float) (OUT_N-1)) , fmul32(im_5[OUT_N-1][OUT_N-1] , (float)(i) / (float)(OUT_N-1)));
}
for(int i=1;i<OUT_N-1;i++){
for(int j=0;j<OUT_N;j++){
im_5[i][j] = fadd32 (fmul32(im_5[0][j] , (float)(OUT_N - 1 - i) / (float)(OUT_N - 1)) , fmul32(im_5[OUT_N-1][j] , (float)(i) / (float)(OUT_N - 1)));
}
}
for(int i=0;i<OUT_N;i++){
for(int j=0;j<OUT_N;j++){
printf("%f ",im_5[i][j]);
}
printf("\n");
}
return 0;
}
/*answer = 0.954780 0.877887 0.800995 0.724102 0.647210
0.921899 0.826679 0.731460 0.636240 0.541020
0.889019 0.775471 0.661924 0.548377 0.434830
0.856138 0.724263 0.592389 0.460514 0.328640
0.823257 0.673055 0.522853 0.372652 0.222450 */
Assembly code on github main_v2.s.
The following image is the result after executing with Ripes.
This is an unoptimized version,it needs 20108 cycles to finish.
Data cache hit rate is 99.46%.
Instr cache hit rate is 90.61%.
In order to reduce cycle count and improve efficiency, we can observe from the interpolation process above that each value needs to go through two fmul32 instructions. Therefore, if we can reduce the instructions within fmul32, we can significantly reduce the cycle count.
To achieve the above goal, I will rewrite the portion of the fmul32 instruction that utilizes the inc function.
int32_t er = mshift ? inc(ertmp) : ertmp;
The inc function is primarily used to increment the input number by one.
int32_t er;
if(mshift) er = ertmp + 1;
else er = ertmp;
After the modification, there is no need to execute the inc function and the mask_lowest_zero function every time when incrementing by one.
Furthermore, in the initial version, the getbit function did not meet the requirements for static inline but was written as a separate function.
getbit:
#li a0 , 0x00000011
#li a1 , 1
addi sp , sp , -12
sw s0 , 0(sp)
sw s1 , 4(sp)
sw ra , 8(sp)
mv s0 , a0
mv s1 , a1
sra t0 , s0 , s1
andi a0 , t0 , 1
lw s0 , 0(sp)
lw s1 , 4(sp)
lw ra , 8(sp)
addi sp , sp , 12
ret
Simply need to inline the content of the getbit function at the call sites, which can reduce the extra cycles generated by function calls.
mv a0 , s4
mv a1 , s5
call imul32
mv s8 , a0
#inline getbit
li t0 , 24
sra t0 , s8 , t0
andi s9 , t0 , 1
The improved version is main_v2.s , the cycles is 18595.
Data cache hit rate is 99.36%.
Instr cache hit rate is 91.53%.
Generated from Ripes
0: 4300006f jal x0 1072 <main>
00000004 <count_leading_zeros>:
4: ff810113 addi x2 x2 -8
8: 00812023 sw x8 0 x2
c: 00112223 sw x1 4 x2
10: 00050413 addi x8 x10 0
14: 00145293 srli x5 x8 1
18: 00546433 or x8 x8 x5
1c: 00245293 srli x5 x8 2
20: 00546433 or x8 x8 x5
24: 00445293 srli x5 x8 4
28: 00546433 or x8 x8 x5
2c: 00845293 srli x5 x8 8
30: 00546433 or x8 x8 x5
34: 01045293 srli x5 x8 16
38: 00546433 or x8 x8 x5
3c: 00145293 srli x5 x8 1
40: 55555337 lui x6 0x55555
44: 55530313 addi x6 x6 1365
48: 0062f2b3 and x5 x5 x6
4c: 40540433 sub x8 x8 x5
50: 33333337 lui x6 0x33333
54: 33330313 addi x6 x6 819
58: 006473b3 and x7 x8 x6
5c: 00245293 srli x5 x8 2
60: 33333337 lui x6 0x33333
64: 33330313 addi x6 x6 819
68: 0062f2b3 and x5 x5 x6
6c: 00728433 add x8 x5 x7
70: 00445293 srli x5 x8 4
74: 008282b3 add x5 x5 x8
78: 0f0f1337 lui x6 0xf0f1
7c: f0f30313 addi x6 x6 -241
80: 0062f433 and x8 x5 x6
84: 00845293 srli x5 x8 8
88: 00540433 add x8 x8 x5
8c: 01045293 srli x5 x8 16
90: 00540433 add x8 x8 x5
94: 02000293 addi x5 x0 32
98: 07f47313 andi x6 x8 127
9c: 40628533 sub x10 x5 x6
a0: 00012403 lw x8 0 x2
a4: 00412083 lw x1 4 x2
a8: 00810113 addi x2 x2 8
ac: 00008067 jalr x0 x1 0
000000b0 <imul32>:
b0: ff010113 addi x2 x2 -16
b4: 00812023 sw x8 0 x2
b8: 00912223 sw x9 4 x2
bc: 01212423 sw x18 8 x2
c0: 00112623 sw x1 12 x2
c4: 00050413 addi x8 x10 0
c8: 00058493 addi x9 x11 0
cc: 00000913 addi x18 x0 0
000000d0 <while_imul32>:
d0: 0014f293 andi x5 x9 1
d4: 00028463 beq x5 x0 8 <exitif>
d8: 00890933 add x18 x18 x8
000000dc <exitif>:
dc: 0014d493 srli x9 x9 1
e0: 00048663 beq x9 x0 12 <done_imul32>
e4: 00195913 srli x18 x18 1
e8: fe9ff06f jal x0 -24 <while_imul32>
000000ec <done_imul32>:
ec: 00090513 addi x10 x18 0
f0: 00012403 lw x8 0 x2
f4: 00412483 lw x9 4 x2
f8: 00812903 lw x18 8 x2
fc: 00c12083 lw x1 12 x2
100: 01010113 addi x2 x2 16
104: 00008067 jalr x0 x1 0
00000108 <fmul32>:
108: fcc10113 addi x2 x2 -52
10c: 00812023 sw x8 0 x2
110: 00912223 sw x9 4 x2
114: 01212423 sw x18 8 x2
118: 01312623 sw x19 12 x2
11c: 01412823 sw x20 16 x2
120: 01512a23 sw x21 20 x2
124: 01612c23 sw x22 24 x2
128: 01712e23 sw x23 28 x2
12c: 03812023 sw x24 32 x2
130: 03912223 sw x25 36 x2
134: 03a12423 sw x26 40 x2
138: 03b12623 sw x27 44 x2
13c: 02112823 sw x1 48 x2
140: 00050413 addi x8 x10 0
144: 00058493 addi x9 x11 0
148: 01f00293 addi x5 x0 31
14c: 00545933 srl x18 x8 x5
150: 0054d9b3 srl x19 x9 x5
154: 008002b7 lui x5 0x800
158: fff28293 addi x5 x5 -1
15c: 005472b3 and x5 x8 x5
160: 00800337 lui x6 0x800
164: 0062ea33 or x20 x5 x6
168: 008002b7 lui x5 0x800
16c: fff28293 addi x5 x5 -1
170: 0054f2b3 and x5 x9 x5
174: 00800337 lui x6 0x800
178: 0062eab3 or x21 x5 x6
17c: 01700293 addi x5 x0 23
180: 005452b3 srl x5 x8 x5
184: 0ff00313 addi x6 x0 255
188: 0062fb33 and x22 x5 x6
18c: 01700293 addi x5 x0 23
190: 0054d2b3 srl x5 x9 x5
194: 0ff00313 addi x6 x0 255
198: 0062fbb3 and x23 x5 x6
19c: 000a0513 addi x10 x20 0
1a0: 000a8593 addi x11 x21 0
1a4: 00000097 auipc x1 0x0 <start>
1a8: f0c080e7 jalr x1 x1 -244
1ac: 00050c13 addi x24 x10 0
1b0: 01800293 addi x5 x0 24
1b4: 405c52b3 sra x5 x24 x5
1b8: 0012fc93 andi x25 x5 1
1bc: 019c5d33 srl x26 x24 x25
1c0: 017b02b3 add x5 x22 x23
1c4: 07f00313 addi x6 x0 127
1c8: 40628c33 sub x24 x5 x6
1cc: 01900663 beq x0 x25 12 <no_inc_ertmp>
1d0: 001c0d93 addi x27 x24 1
1d4: 0080006f jal x0 8 <fmul32_exitifelse>
000001d8 <no_inc_ertmp>:
1d8: 000c0d93 addi x27 x24 0
000001dc <fmul32_exitifelse>:
1dc: 01394cb3 xor x25 x18 x19
1e0: 01f00293 addi x5 x0 31
1e4: 005c92b3 sll x5 x25 x5
1e8: 0ff00313 addi x6 x0 255
1ec: 006df333 and x6 x27 x6
1f0: 01700393 addi x7 x0 23
1f4: 00731333 sll x6 x6 x7
1f8: 008003b7 lui x7 0x800
1fc: fff38393 addi x7 x7 -1
200: 007d73b3 and x7 x26 x7
204: 0062e2b3 or x5 x5 x6
208: 0072e533 or x10 x5 x7
20c: 00012403 lw x8 0 x2
210: 00412483 lw x9 4 x2
214: 00812903 lw x18 8 x2
218: 00c12983 lw x19 12 x2
21c: 01012a03 lw x20 16 x2
220: 01412a83 lw x21 20 x2
224: 01812b03 lw x22 24 x2
228: 01c12b83 lw x23 28 x2
22c: 02012c03 lw x24 32 x2
230: 02412c83 lw x25 36 x2
234: 02812d03 lw x26 40 x2
238: 02c12d83 lw x27 44 x2
23c: 03012083 lw x1 48 x2
240: 03410113 addi x2 x2 52
244: 00008067 jalr x0 x1 0
00000248 <fadd32>:
248: fcc10113 addi x2 x2 -52
24c: 00812023 sw x8 0 x2
250: 00912223 sw x9 4 x2
254: 01212423 sw x18 8 x2
258: 01312623 sw x19 12 x2
25c: 01412823 sw x20 16 x2
260: 01512a23 sw x21 20 x2
264: 01612c23 sw x22 24 x2
268: 01712e23 sw x23 28 x2
26c: 03812023 sw x24 32 x2
270: 03912223 sw x25 36 x2
274: 03a12423 sw x26 40 x2
278: 03b12623 sw x27 44 x2
27c: 02112823 sw x1 48 x2
280: 00050413 addi x8 x10 0
284: 00058493 addi x9 x11 0
288: 800002b7 lui x5 0x80000
28c: fff28293 addi x5 x5 -1
290: 00547333 and x6 x8 x5
294: 0054f3b3 and x7 x9 x5
298: 0063c863 blt x7 x6 16 <noswap>
29c: 00040293 addi x5 x8 0
2a0: 00048413 addi x8 x9 0
2a4: 00028493 addi x9 x5 0
000002a8 <noswap>:
2a8: 01f00293 addi x5 x0 31
2ac: 00545933 srl x18 x8 x5
2b0: 0054d9b3 srl x19 x9 x5
2b4: 008002b7 lui x5 0x800
2b8: fff28293 addi x5 x5 -1
2bc: 00800337 lui x6 0x800
2c0: 005473b3 and x7 x8 x5
2c4: 0063ea33 or x20 x7 x6
2c8: 0054f3b3 and x7 x9 x5
2cc: 0063eab3 or x21 x7 x6
2d0: 01700293 addi x5 x0 23
2d4: 0ff00313 addi x6 x0 255
2d8: 005453b3 srl x7 x8 x5
2dc: 0063fb33 and x22 x7 x6
2e0: 0054d3b3 srl x7 x9 x5
2e4: 0063fbb3 and x23 x7 x6
2e8: 417b02b3 sub x5 x22 x23
2ec: 01800313 addi x6 x0 24
2f0: 00534663 blt x6 x5 12 <setalign_1>
2f4: 00028c13 addi x24 x5 0
2f8: 0080006f jal x0 8 <setalign_exit>
000002fc <setalign_1>:
2fc: 00030c13 addi x24 x6 0
00000300 <setalign_exit>:
300: 018adab3 srl x21 x21 x24
304: 013962b3 or x5 x18 x19
308: 00029663 bne x5 x0 12 <setma_1>
30c: 015a0a33 add x20 x20 x21
310: 0080006f jal x0 8 <setma_exit>
00000314 <setma_1>:
314: 415a0a33 sub x20 x20 x21
00000318 <setma_exit>:
318: 000a0513 addi x10 x20 0
31c: 00000097 auipc x1 0x0 <start>
320: ce8080e7 jalr x1 x1 -792
324: 00050c93 addi x25 x10 0
328: 00000d13 addi x26 x0 0
32c: 00800293 addi x5 x0 8
330: 0192cc63 blt x5 x25 24 <shift_false>
334: 00800293 addi x5 x0 8
338: 41928d33 sub x26 x5 x25
33c: 01aa5a33 srl x20 x20 x26
340: 01ab0b33 add x22 x22 x26
344: 0140006f jal x0 20 <shift_exit>
00000348 <shift_false>:
348: 00800293 addi x5 x0 8
34c: 405c8d33 sub x26 x25 x5
350: 01aa1a33 sll x20 x20 x26
354: 41ab0b33 sub x22 x22 x26
00000358 <shift_exit>:
358: 800002b7 lui x5 0x80000
35c: 005472b3 and x5 x8 x5
360: 01700313 addi x6 x0 23
364: 006b1333 sll x6 x22 x6
368: 008003b7 lui x7 0x800
36c: fff38393 addi x7 x7 -1
370: 007a73b3 and x7 x20 x7
374: 0062e2b3 or x5 x5 x6
378: 0072e533 or x10 x5 x7
37c: 00012403 lw x8 0 x2
380: 00412483 lw x9 4 x2
384: 00812903 lw x18 8 x2
388: 00c12983 lw x19 12 x2
38c: 01012a03 lw x20 16 x2
390: 01412a83 lw x21 20 x2
394: 01812b03 lw x22 24 x2
398: 01c12b83 lw x23 28 x2
39c: 02012c03 lw x24 32 x2
3a0: 02412c83 lw x25 36 x2
3a4: 02812d03 lw x26 40 x2
3a8: 02c12d83 lw x27 44 x2
3ac: 03012083 lw x1 48 x2
3b0: 03410113 addi x2 x2 52
3b4: 00008067 jalr x0 x1 0
000003b8 <print_image>:
3b8: ff410113 addi x2 x2 -12
3bc: 00812023 sw x8 0 x2
3c0: 00912223 sw x9 4 x2
3c4: 01212423 sw x18 8 x2
3c8: 00000413 addi x8 x0 0
3cc: 00000493 addi x9 x0 0
3d0: 00500913 addi x18 x0 5
000003d4 <p_outer_loop>:
3d4: 05245463 bge x8 x18 72 <p_done>
3d8: 00000493 addi x9 x0 0
000003dc <p_inner_loop>:
3dc: 0324d463 bge x9 x18 40 <p_inner_done>
3e0: 0005a503 lw x10 0 x11
3e4: 00200893 addi x17 x0 2
3e8: 00000073 ecall
3ec: 02000513 addi x10 x0 32
3f0: 00b00893 addi x17 x0 11
3f4: 00000073 ecall
3f8: 00458593 addi x11 x11 4
3fc: 00148493 addi x9 x9 1
400: fddff06f jal x0 -36 <p_inner_loop>
00000404 <p_inner_done>:
404: 10000517 auipc x10 0x10000
408: bfc50513 addi x10 x10 -1028
40c: 00400893 addi x17 x0 4
410: 00000073 ecall
414: 00140413 addi x8 x8 1
418: fbdff06f jal x0 -68 <p_outer_loop>
0000041c <p_done>:
41c: 00012403 lw x8 0 x2
420: 00412483 lw x9 4 x2
424: 00812903 lw x18 8 x2
428: 00c10113 addi x2 x2 12
42c: 00008067 jalr x0 x1 0
00000430 <main>:
430: 10000517 auipc x10 0x10000
434: bd250513 addi x10 x10 -1070
438: 00052a03 lw x20 0 x10
43c: 00452a83 lw x21 4 x10
440: 10000517 auipc x10 0x10000
444: bd650513 addi x10 x10 -1066
448: 00052283 lw x5 0 x10
44c: 00452303 lw x6 4 x10
450: 00852383 lw x7 8 x10
454: 00c52e03 lw x28 12 x10
458: 10000b17 auipc x22 0x10000
45c: bceb0b13 addi x22 x22 -1074
460: 005b2023 sw x5 0 x22
464: 006b2823 sw x6 16 x22
468: 047b2823 sw x7 80 x22
46c: 07cb2023 sw x28 96 x22
470: 00100413 addi x8 x0 1
474: 00400493 addi x9 x0 4
00000478 <first_loop>:
478: 000b2503 lw x10 0 x22
47c: 10000f17 auipc x30 0x10000
480: b8ef0f13 addi x30 x30 -1138
484: 00300313 addi x6 x0 3
488: 40830333 sub x6 x6 x8
48c: 00231313 slli x6 x6 2
490: 006f0333 add x6 x30 x6
494: 00032583 lw x11 0 x6
498: 00000097 auipc x1 0x0 <start>
49c: c70080e7 jalr x1 x1 -912
4a0: 00050d93 addi x27 x10 0
4a4: 010b2503 lw x10 16 x22
4a8: 10000f17 auipc x30 0x10000
4ac: b62f0f13 addi x30 x30 -1182
4b0: fff40313 addi x6 x8 -1
4b4: 00231313 slli x6 x6 2
4b8: 006f0333 add x6 x30 x6
4bc: 00032583 lw x11 0 x6
4c0: 00000097 auipc x1 0x0 <start>
4c4: c48080e7 jalr x1 x1 -952
4c8: 000d8593 addi x11 x27 0
4cc: 00000097 auipc x1 0x0 <start>
4d0: d7c080e7 jalr x1 x1 -644
4d4: 00241313 slli x6 x8 2
4d8: 01630333 add x6 x6 x22
4dc: 00a32023 sw x10 0 x6
4e0: 050b2503 lw x10 80 x22
4e4: 10000f17 auipc x30 0x10000
4e8: b26f0f13 addi x30 x30 -1242
4ec: 00300313 addi x6 x0 3
4f0: 40830333 sub x6 x6 x8
4f4: 00231313 slli x6 x6 2
4f8: 006f0333 add x6 x30 x6
4fc: 00032583 lw x11 0 x6
500: 00000097 auipc x1 0x0 <start>
504: c08080e7 jalr x1 x1 -1016
508: 00050d93 addi x27 x10 0
50c: 060b2503 lw x10 96 x22
510: 10000f17 auipc x30 0x10000
514: afaf0f13 addi x30 x30 -1286
518: fff40313 addi x6 x8 -1
51c: 00231313 slli x6 x6 2
520: 006f0333 add x6 x30 x6
524: 00032583 lw x11 0 x6
528: 00000097 auipc x1 0x0 <start>
52c: be0080e7 jalr x1 x1 -1056
530: 000d8593 addi x11 x27 0
534: 00000097 auipc x1 0x0 <start>
538: d14080e7 jalr x1 x1 -748
53c: 00241313 slli x6 x8 2
540: 01630333 add x6 x6 x22
544: 04a32823 sw x10 80 x6
548: 00140413 addi x8 x8 1
54c: f29446e3 blt x8 x9 -212 <first_loop>
550: 00100413 addi x8 x0 1
554: 00400913 addi x18 x0 4
558: 00500993 addi x19 x0 5
0000055c <second_outloop>:
55c: 00000493 addi x9 x0 0
00000560 <second_inloop>:
560: 00249293 slli x5 x9 2
564: 016282b3 add x5 x5 x22
568: 0002a503 lw x10 0 x5
56c: 10000f17 auipc x30 0x10000
570: a9ef0f13 addi x30 x30 -1378
574: 00300313 addi x6 x0 3
578: 40830333 sub x6 x6 x8
57c: 00231313 slli x6 x6 2
580: 006f0333 add x6 x30 x6
584: 00032583 lw x11 0 x6
588: 00000097 auipc x1 0x0 <start>
58c: b80080e7 jalr x1 x1 -1152
590: 00050d93 addi x27 x10 0
594: 00249293 slli x5 x9 2
598: 016282b3 add x5 x5 x22
59c: 0502a503 lw x10 80 x5
5a0: 10000f17 auipc x30 0x10000
5a4: a6af0f13 addi x30 x30 -1430
5a8: fff40313 addi x6 x8 -1
5ac: 00231313 slli x6 x6 2
5b0: 006f0333 add x6 x30 x6
5b4: 00032583 lw x11 0 x6
5b8: 00000097 auipc x1 0x0 <start>
5bc: b50080e7 jalr x1 x1 -1200
5c0: 000d8593 addi x11 x27 0
5c4: 00000097 auipc x1 0x0 <start>
5c8: c84080e7 jalr x1 x1 -892
5cc: 00241313 slli x6 x8 2
5d0: 00830333 add x6 x6 x8
5d4: 00930333 add x6 x6 x9
5d8: 00231313 slli x6 x6 2
5dc: 01630333 add x6 x6 x22
5e0: 00a32023 sw x10 0 x6
5e4: 00148493 addi x9 x9 1
5e8: f734cce3 blt x9 x19 -136 <second_inloop>
5ec: 00140413 addi x8 x8 1
5f0: f72446e3 blt x8 x18 -148 <second_outloop>
5f4: 10000597 auipc x11 0x10000
5f8: a3258593 addi x11 x11 -1486
5fc: 00000097 auipc x1 0x0 <start>
600: dbc080e7 jalr x1 x1 -580
604: 00a00893 addi x17 x0 10
608: 00000073 ecall
R-type instr don't need to read or write memory , pass Res(0x00000002) through this stage and go to WB stage
After all these stage are done, the register is updated like this:
Bilinear interpolation - Wikipedia
Detailed Explanation of Bilinear Interpolation for Image Scaling
or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up