# llvm-epi trace
## new compile flag
``` c=
static cl::opt<bool> UseStridedAccesses(
"vectorizer-use-vp-strided-load-store",
cl::init(false),
cl::Hidden,
cl::desc("Use VPred strided vector load store. This is EXPERIMENTAL"));
```
```c=
static cl::opt<unsigned> VectorRegisterWidthFactor(
"vector-register-width-factor", cl::init(1), cl::Hidden,
cl::desc("On targets that support variable width for vector registers, "
"value by which the vector register width is a multiple of "
"minimum vector register width."));
```
## Test cases in [Auto-vectorization in GCC](https://gcc.gnu.org/projects/tree-ssa/vectorization.html)
(D) = different, (S) = same (not very difference)
7/24 have some difference vectorization.
some case LMUL are different. (like 11.)
Compiler:
LLVM version: LLVM-EPI (73ad1d60)
Compile options:
```bash
clang \
--target=riscv64-unknown-elf \
-march=rv64gcv0p10 \
-menable-experimental-extensions \
-O0 -Xclang -disable-O0-optnone \
-fno-builtin \
-S -emit-llvm main.c -o main.ll
opt \
-O3\
-scalable-vectorization=preferred \
-riscv-v-vector-bits-min=256 \
-mtriple=riscv64-linux-gnu \
-mattr=+experimental-v \
-S main.ll -o main.opt.ll
llc \
-O3 --riscv-v-vector-bits-min=256 \
-mtriple=riscv64-linux-gnu \
-mattr=+experimental-v \
-verify-machineinstrs \
main.opt.ll -o main.s
```
1. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
csrr a7, vlenb
slli t0, a7, 1
addi a0, zero, 256
bgeu a0, t0, .LBB0_2
# %bb.1:
mv t1, zero
j .LBB0_5
.LBB0_2: # %vector.ph
mv a3, zero
mv a4, zero
remu a6, a0, t0
sub t1, a0, a6
lui a0, %hi(a)
addi t3, a0, %lo(a)
slli a0, a7, 2
add t2, a0, t3
slli a7, a7, 3
lui a1, %hi(c)
addi a1, a1, %lo(c)
add t4, a0, a1
lui a2, %hi(b)
addi a2, a2, %lo(b)
add a0, a0, a2
.LBB0_3: # %vector.body
# =>This Inner Loop Header: Depth=1
add a5, a3, a2
vl4re32.v v8, (a5)
add a5, a0, a3
vl4re32.v v12, (a5)
add a5, a3, a1
vl4re32.v v16, (a5)
add a5, t4, a3
vl4re32.v v20, (a5)
vsetvli a5, zero, e32, m4, ta, mu
vadd.vv v8, v16, v8
vadd.vv v12, v20, v12
add a5, a3, t3
vs4r.v v8, (a5)
add a5, t2, a3
vs4r.v v12, (a5)
add a4, a4, t0
add a3, a3, a7
bne a4, t1, .LBB0_3
# %bb.4: # %middle.block
beqz a6, .LBB0_7
.LBB0_5: # %for.body.preheader
addi a0, t1, -256
lui a1, %hi(a)
addi a1, a1, %lo(a)
slli a3, t1, 2
add a1, a1, a3
lui a2, %hi(c)
addi a2, a2, %lo(c)
add a2, a2, a3
lui a4, %hi(b)
addi a4, a4, %lo(b)
add a3, a3, a4
.LBB0_6: # %for.body
# =>This Inner Loop Header: Depth=1
lw a6, 0(a3)
lw a5, 0(a2)
mv a4, a0
addw a0, a5, a6
sw a0, 0(a1)
addi a0, a4, 1
addi a1, a1, 4
addi a2, a2, 4
addi a3, a3, 4
bgeu a0, a4, .LBB0_6
.LBB0_7: # %for.end
mv a0, zero
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type b,@object # @b
.bss
.globl b
.p2align 2
b:
.zero 1024
.size b, 1024
.type c,@object # @c
.globl c
.p2align 2
c:
.zero 1024
.size c, 1024
.type a,@object # @a
.globl a
.p2align 2
a:
.zero 1024
.size a, 1024
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
2. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
csrr t0, vlenb
blez a0, .LBB0_3
# %bb.1: # %for.body.preheader
slli a2, a0, 32
srli t1, a2, 32
slli a7, t0, 1
bgeu t1, a7, .LBB0_4
# %bb.2:
mv t2, zero
j .LBB0_7
.LBB0_3:
mv t1, zero
j .LBB0_9
.LBB0_4: # %vector.ph
mv a3, zero
remu a6, t1, a7
sub t2, t1, a6
vsetvli a2, zero, e32, m4, ta, mu
vmv.v.x v8, a1
slli t3, t0, 2
lui a4, %hi(b)
addi a4, a4, %lo(b)
slli a5, t0, 3
.LBB0_5: # %vector.body
# =>This Inner Loop Header: Depth=1
vs4r.v v8, (a4)
add a2, a4, t3
vs4r.v v8, (a2)
add a3, a3, a7
add a4, a4, a5
bne a3, t2, .LBB0_5
# %bb.6: # %middle.block
beqz a6, .LBB0_9
.LBB0_7: # %for.body.preheader35
sub a2, t1, t2
lui a3, %hi(b)
addi a3, a3, %lo(b)
slli a4, t2, 2
add a3, a3, a4
.LBB0_8: # %for.body
# =>This Inner Loop Header: Depth=1
sw a1, 0(a3)
addi a2, a2, -1
addi a3, a3, 4
bnez a2, .LBB0_8
.LBB0_9: # %while.cond.preheader
beqz a0, .LBB0_17
# %bb.10: # %while.body.preheader
addiw a1, a0, -1
slli a1, a1, 32
srli a1, a1, 32
addi a4, a1, 1
slli t2, t0, 1
bgeu a4, t2, .LBB0_12
# %bb.11:
mv a7, t1
j .LBB0_15
.LBB0_12: # %vector.ph21
mv a5, zero
remu a6, a4, t2
sub t3, a4, a6
add a7, t1, t3
subw a0, a0, t3
lui a1, %hi(a)
addi t5, a1, %lo(a)
slli a2, t0, 2
add t4, a2, t5
slli a1, t1, 2
slli t0, t0, 3
lui a3, %hi(c)
addi t6, a3, %lo(c)
add t1, a2, t6
lui a4, %hi(b)
addi a4, a4, %lo(b)
add a2, a2, a4
.LBB0_13: # %vector.body19
# =>This Inner Loop Header: Depth=1
add a3, a1, a4
vl4re32.v v8, (a3)
add a3, a2, a1
vl4re32.v v12, (a3)
add a3, a1, t6
vl4re32.v v16, (a3)
add a3, t1, a1
vl4re32.v v20, (a3)
vsetvli a3, zero, e32, m4, ta, mu
vand.vv v8, v16, v8
vand.vv v12, v20, v12
add a3, a1, t5
vs4r.v v8, (a3)
add a3, t4, a1
vs4r.v v12, (a3)
add a5, a5, t2
add a1, a1, t0
bne a5, t3, .LBB0_13
# %bb.14: # %middle.block17
beqz a6, .LBB0_17
.LBB0_15: # %while.body.preheader34
lui a1, %hi(a)
addi a1, a1, %lo(a)
slli a3, a7, 2
add a1, a1, a3
lui a2, %hi(c)
addi a2, a2, %lo(c)
add a2, a2, a3
lui a4, %hi(b)
addi a4, a4, %lo(b)
add a3, a3, a4
.LBB0_16: # %while.body
# =>This Inner Loop Header: Depth=1
lw a4, 0(a3)
lw a5, 0(a2)
addiw a0, a0, -1
and a4, a4, a5
sw a4, 0(a1)
addi a1, a1, 4
addi a2, a2, 4
addi a3, a3, 4
bnez a0, .LBB0_16
.LBB0_17: # %while.end
mv a0, zero
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type b,@object # @b
.bss
.globl b
.p2align 2
b:
.zero 1024
.size b, 1024
.type c,@object # @c
.globl c
.p2align 2
c:
.zero 1024
.size c, 1024
.type a,@object # @a
.globl a
.p2align 2
a:
.zero 1024
.size a, 1024
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
3. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
beqz a0, .LBB0_7
# %bb.1: # %while.body.preheader
addiw a3, a0, -1
slli a3, a3, 32
srli a3, a3, 32
addi a4, a3, 1
csrr a7, vlenb
slli t0, a7, 1
bgeu a4, t0, .LBB0_3
# %bb.2:
mv t1, a2
mv t5, a1
j .LBB0_6
.LBB0_3: # %vector.ph
mv a5, zero
mv a3, zero
remu a6, a4, t0
sub t2, a4, a6
slli a4, t2, 2
add t1, a2, a4
add t5, a1, a4
subw a0, a0, t2
slli t4, a7, 2
add t3, a1, t4
slli a7, a7, 3
add t4, t4, a2
.LBB0_4: # %vector.body
# =>This Inner Loop Header: Depth=1
add a4, a2, a5
vl4re32.v v8, (a4)
add a4, t4, a5
vl4re32.v v12, (a4)
add a4, a1, a5
vs4r.v v8, (a4)
add a4, t3, a5
vs4r.v v12, (a4)
add a3, a3, t0
add a5, a5, a7
bne a3, t2, .LBB0_4
# %bb.5: # %middle.block
beqz a6, .LBB0_7
.LBB0_6: # %while.body
# =>This Inner Loop Header: Depth=1
lw a1, 0(t1)
addiw a0, a0, -1
addi t1, t1, 4
addi a2, t5, 4
sw a1, 0(t5)
mv t5, a2
bnez a0, .LBB0_6
.LBB0_7: # %while.end
mv a0, zero
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
4. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
beqz a0, .LBB0_7
# %bb.1: # %while.body.preheader
addiw a3, a0, -1
slli a3, a3, 32
srli a3, a3, 32
addi a4, a3, 1
csrr t0, vlenb
slli a7, t0, 1
bgeu a4, a7, .LBB0_3
# %bb.2:
mv t1, a2
mv t5, a1
j .LBB0_6
.LBB0_3: # %vector.ph
mv a5, zero
mv a3, zero
remu a6, a4, a7
sub t2, a4, a6
subw a0, a0, t2
slli a4, t2, 2
add t1, a2, a4
add t5, a1, a4
slli t4, t0, 2
add t3, a1, t4
slli t0, t0, 3
add t4, t4, a2
.LBB0_4: # %vector.body
# =>This Inner Loop Header: Depth=1
add a4, a2, a5
vl4re32.v v8, (a4)
add a4, t4, a5
vl4re32.v v12, (a4)
vsetvli a4, zero, e32, m4, ta, mu
vadd.vi v8, v8, 5
vadd.vi v12, v12, 5
add a4, a1, a5
vs4r.v v8, (a4)
add a4, t3, a5
vs4r.v v12, (a4)
add a3, a3, a7
add a5, a5, t0
bne a3, t2, .LBB0_4
# %bb.5: # %middle.block
beqz a6, .LBB0_7
.LBB0_6: # %while.body
# =>This Inner Loop Header: Depth=1
lw a1, 0(t1)
addiw a0, a0, -1
addi t1, t1, 4
addiw a1, a1, 5
addi a2, t5, 4
sw a1, 0(t5)
mv t5, a2
bnez a0, .LBB0_6
.LBB0_7: # %for.end19
mv a0, zero
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type b,@object # @b
.bss
.globl b
.p2align 2
b:
.zero 1024
.size b, 1024
.type c,@object # @c
.globl c
.p2align 2
c:
.zero 1024
.size c, 1024
.type a,@object # @a
.globl a
.p2align 2
a:
.zero 1024
.size a, 1024
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
5. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
csrr a2, vlenb
slli a7, a2, 1
lui a1, 2
addiw a1, a1, 1808
bgeu a1, a7, .LBB0_2
# %bb.1:
mv a1, zero
j .LBB0_5
.LBB0_2: # %vector.ph
mv a4, zero
remu a6, a1, a7
sub a1, a1, a6
slli a5, a2, 2
lui a3, %hi(s)
addi a3, a3, %lo(s)
slli a2, a2, 3
vsetvli a0, zero, e32, m4, ta, mu
vmv.v.i v8, 5
.LBB0_3: # %vector.body
# =>This Inner Loop Header: Depth=1
vs4r.v v8, (a3)
add a0, a3, a5
vs4r.v v8, (a0)
add a4, a4, a7
add a3, a3, a2
bne a4, a1, .LBB0_3
# %bb.4: # %middle.block
beqz a6, .LBB0_7
.LBB0_5: # %for.body.preheader
lui a0, 1048574
addiw a0, a0, -1808
add a0, a0, a1
lui a2, %hi(s)
addi a2, a2, %lo(s)
slli a1, a1, 2
add a1, a1, a2
addi a2, zero, 5
.LBB0_6: # %for.body
# =>This Inner Loop Header: Depth=1
mv a3, a0
sw a2, 0(a1)
addi a0, a0, 1
addi a1, a1, 4
bgeu a0, a3, .LBB0_6
.LBB0_7: # %for.end
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type s,@object # @s
.bss
.globl s
.p2align 2
s:
.zero 40000
.size s, 40000
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
7. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
csrr a7, vlenb
slli t0, a7, 1
lui a1, 2
addiw a2, a1, 1808
bgeu a2, t0, .LBB0_2
# %bb.1:
mv t1, zero
j .LBB0_5
.LBB0_2: # %vector.ph
mv a5, zero
mv a4, zero
remu a6, a2, t0
sub t1, a2, a6
lui a1, %hi(a)
addi t2, a1, %lo(a)
add a1, a0, a7
slli a1, a1, 2
lui a2, %hi(b)
addi a2, a2, %lo(b)
add t3, a1, a2
slli a1, a0, 2
add t4, a1, a2
slli a1, a7, 2
add a3, a1, t2
slli a1, a7, 3
.LBB0_3: # %vector.body
# =>This Inner Loop Header: Depth=1
add a2, t4, a5
vl4re32.v v8, (a2)
add a2, t3, a5
vl4re32.v v12, (a2)
add a2, a5, t2
vs4r.v v8, (a2)
add a2, a3, a5
vs4r.v v12, (a2)
add a4, a4, t0
add a5, a5, a1
bne a4, t1, .LBB0_3
# %bb.4: # %middle.block
beqz a6, .LBB0_7
.LBB0_5: # %for.body.preheader
lui a1, 1048574
addiw a1, a1, -1808
add a1, a1, t1
lui a2, %hi(a)
addi a2, a2, %lo(a)
slli a3, t1, 2
add a3, a3, a2
add a0, a0, t1
slli a0, a0, 2
lui a2, %hi(b)
addi a2, a2, %lo(b)
add a0, a0, a2
.LBB0_6: # %for.body
# =>This Inner Loop Header: Depth=1
lw a2, 0(a0)
mv a4, a1
sw a2, 0(a3)
addi a1, a1, 1
addi a3, a3, 4
addi a0, a0, 4
bgeu a1, a4, .LBB0_6
.LBB0_7: # %for.end
mv a0, zero
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type b,@object # @b
.bss
.globl b
.p2align 2
b:
.zero 1024
.size b, 1024
.type a,@object # @a
.globl a
.p2align 2
a:
.zero 1024
.size a, 1024
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
8. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
mv t3, zero
csrr a1, vlenb
slli t5, a1, 1
lui a2, 2
addiw t2, a2, 1808
sltu a7, t2, t5
vsetvli a2, zero, e32, m4, ta, mu
vmv.v.x v8, a0
lui a2, %hi(a)
addi t4, a2, %lo(a)
lui a2, 1048574
addiw a6, a2, -1808
lui a2, 10
addiw t1, a2, -960
remu t0, t2, t5
sub a4, t2, t0
slli t6, a1, 2
slli a1, a1, 3
j .LBB0_2
.LBB0_1: # %for.inc6
# in Loop: Header=BB0_2 Depth=1
addi t3, t3, 1
add t4, t4, t1
beq t3, t2, .LBB0_8
.LBB0_2: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB0_4 Depth 2
# Child Loop BB0_7 Depth 2
mv a5, zero
bnez a7, .LBB0_6
# %bb.3: # %vector.ph
# in Loop: Header=BB0_2 Depth=1
mv a2, zero
mv a5, t4
.LBB0_4: # %vector.body
# Parent Loop BB0_2 Depth=1
# => This Inner Loop Header: Depth=2
vs4r.v v8, (a5)
add a3, a5, t6
vs4r.v v8, (a3)
add a2, a2, t5
add a5, a5, a1
bne a2, a4, .LBB0_4
# %bb.5: # %middle.block
# in Loop: Header=BB0_2 Depth=1
mv a5, a4
beqz t0, .LBB0_1
.LBB0_6: # %for.body3.preheader
# in Loop: Header=BB0_2 Depth=1
add a2, a5, a6
slli a3, a5, 2
add a5, t4, a3
.LBB0_7: # %for.body3
# Parent Loop BB0_2 Depth=1
# => This Inner Loop Header: Depth=2
mv a3, a2
sw a0, 0(a5)
addi a2, a2, 1
addi a5, a5, 4
bgeu a2, a3, .LBB0_7
j .LBB0_1
.LBB0_8: # %for.end8
mv a0, zero
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type a,@object # @a
.bss
.globl a
.p2align 2
a:
.zero 400000000
.size a, 400000000
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
9. (D)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
lui a0, %hi(udiff)
lwu t2, %lo(udiff)(a0)
csrr a7, vlenb
slli t0, a7, 1
lui a1, 2
addiw a1, a1, 1808
bgeu a1, t0, .LBB0_2
# %bb.1:
mv t1, zero
j .LBB0_5
.LBB0_2: # %vector.ph
mv a5, zero
mv a3, zero
remu a6, a1, t0
sub t1, a1, a6
vsetvli a1, zero, e32, m4, ta, mu
vmv.v.i v8, 0
vsetvli zero, zero, e32, m4, tu, mu
vmv4r.v v12, v8
vmv.s.x v12, t2
lui a0, %hi(uc)
addi t3, a0, %lo(uc)
slli a4, a7, 2
add t2, a4, t3
slli a1, a7, 3
lui a2, %hi(ub)
addi a2, a2, %lo(ub)
add a4, a4, a2
.LBB0_3: # %vector.body
# =>This Inner Loop Header: Depth=1
add a0, a5, a2
vl4re32.v v16, (a0)
add a0, a4, a5
vl4re32.v v20, (a0)
add a0, a5, t3
vl4re32.v v24, (a0)
add a0, t2, a5
vl4re32.v v28, (a0)
vsetvli zero, zero, e32, m4, ta, mu
vsub.vv v16, v16, v24
vsub.vv v20, v20, v28
vadd.vv v12, v16, v12
vadd.vv v8, v20, v8
add a3, a3, t0
add a5, a5, a1
bne a3, t1, .LBB0_3
# %bb.4: # %middle.block
vadd.vv v8, v8, v12
vsetvli a0, zero, e32, m1, ta, mu
vmv.v.i v12, 0
vsetvli a0, zero, e32, m4, ta, mu
vredsum.vs v8, v8, v12
vmv.x.s t2, v8
beqz a6, .LBB0_7
.LBB0_5: # %for.body.preheader
lui a1, 1048574
addiw a1, a1, -1808
add a1, a1, t1
lui a2, %hi(uc)
addi a2, a2, %lo(uc)
slli a3, t1, 2
add a2, a2, a3
lui a4, %hi(ub)
addi a4, a4, %lo(ub)
add a3, a3, a4
.LBB0_6: # %for.body
# =>This Inner Loop Header: Depth=1
lw a4, 0(a3)
lw a5, 0(a2)
mv a0, a1
subw a1, a4, a5
addw t2, a1, t2
addi a1, a0, 1
addi a2, a2, 4
addi a3, a3, 4
bgeu a1, a0, .LBB0_6
.LBB0_7: # %for.end
lui a0, %hi(udiff)
sw t2, %lo(udiff)(a0)
mv a0, zero
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type udiff,@object # @udiff
.section .sbss,"aw",@nobits
.globl udiff
.p2align 2
udiff:
.word 0 # 0x0
.size udiff, 4
.type ub,@object # @ub
.bss
.globl ub
.p2align 2
ub:
.zero 40000
.size ub, 40000
.type uc,@object # @uc
.globl uc
.p2align 2
uc:
.zero 40000
.size uc, 40000
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
10. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_c2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo
.p2align 1
.type foo,@function
foo:
lui a0, %hi(ib)
ld t0, %lo(ib)(a0)
lui a0, %hi(ic)
ld t5, %lo(ic)(a0)
lui a0, %hi(ia)
ld a6, %lo(ia)(a0)
lui a0, %hi(sb)
ld a7, %lo(sb)(a0)
lui a0, %hi(sc)
ld a4, %lo(sc)(a0)
lui a0, %hi(sa)
ld a5, %lo(sa)(a0)
lui a0, 10
addiw a0, a0, -960
add a1, a6, a0
add t2, t0, a0
add t1, t5, a0
lui a0, 5
addiw a0, a0, -480
add a2, a5, a0
add t3, a7, a0
add t4, a4, a0
sltu t2, a6, t2
sltu a0, t0, a1
and t2, t2, a0
sltu a0, a6, t1
sltu a1, t5, a1
and a0, a0, a1
or t1, t2, a0
sltu a1, a5, t3
sltu a0, a7, a2
and a0, a0, a1
or a0, t1, a0
sltu a1, a5, t4
sltu a2, a4, a2
and a1, a1, a2
or a0, a0, a1
beqz a0, .LBB0_3
lui a0, 2
addiw a0, a0, 1808
mv a1, a6
mv a2, a7
.LBB0_2:
lw t1, 0(t0)
lw a3, 0(t5)
addw a3, a3, t1
sw a3, 0(a1)
lh t1, 0(a2)
lh a3, 0(a4)
addw a3, a3, t1
sh a3, 0(a5)
addi a5, a5, 2
addi a4, a4, 2
addi a2, a2, 2
addi a1, a1, 4
addi t5, t5, 4
addi a0, a0, -1
addi t0, t0, 4
bnez a0, .LBB0_2
j .LBB0_5
.LBB0_3:
lui a0, 2
addiw a0, a0, 1808
mv a1, a7
mv a2, a6
.LBB0_4:
vsetivli zero, 16, e32, m2, ta, mu
vle32.v v8, (t0)
vle32.v v10, (t5)
vadd.vv v8, v10, v8
vse32.v v8, (a2)
vle16.v v8, (a1)
vle16.v v9, (a4)
vsetvli zero, zero, e16, m1, ta, mu
vadd.vv v8, v9, v8
vse16.v v8, (a5)
addi t0, t0, 64
addi t5, t5, 64
addi a2, a2, 64
addi a1, a1, 32
addi a4, a4, 32
addi a0, a0, -16
addi a5, a5, 32
bnez a0, .LBB0_4
.LBB0_5:
lui a0, 2
addiw a0, a0, 1808
.LBB0_6:
vsetivli zero, 16, e16, m1, ta, mu
vle16.v v8, (a7)
vsetvli zero, zero, e32, m2, ta, mu
vsext.vf2 v10, v8
vse32.v v10, (a6)
addi a7, a7, 32
addi a0, a0, -16
addi a6, a6, 64
bnez a0, .LBB0_6
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
.type ib,@object
.section .sbss,"aw",@nobits
.globl ib
.p2align 3
ib:
.quad 0
.size ib, 8
.type ic,@object
.globl ic
.p2align 3
ic:
.quad 0
.size ic, 8
.type ia,@object
.globl ia
.p2align 3
ia:
.quad 0
.size ia, 8
.type sb,@object
.globl sb
.p2align 3
sb:
.quad 0
.size sb, 8
.type sc,@object
.globl sc
.p2align 3
sc:
.quad 0
.size sc, 8
.type sa,@object
.globl sa
.p2align 3
sa:
.quad 0
.size sa, 8
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
.addrsig
```
11. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
csrr a7, vlenb
lui a0, 1
addiw a0, a0, 904
bgeu a0, a7, .LBB0_2
# %bb.1:
mv t0, zero
j .LBB0_5
.LBB0_2: # %vector.ph
mv a3, zero
mv a4, zero
remu a6, a0, a7
sub t0, a0, a6
vsetvli a0, zero, e64, m8, ta, mu
vid.v v8
slli t1, a7, 2
lui a0, %hi(b)
addi a2, a0, %lo(b)
lui a0, %hi(c)
addi a0, a0, %lo(c)
lui a1, %hi(a)
addi t2, a1, %lo(a)
lui a5, %hi(d)
addi a5, a5, %lo(d)
.LBB0_3: # %vector.body
# =>This Inner Loop Header: Depth=1
vsetvli zero, zero, e64, m8, ta, mu
vadd.vx v16, v8, a4
vsll.vi v16, v16, 3
vor.vi v24, v16, 4
vsetvli zero, zero, e32, m4, ta, mu
vluxei64.v v0, (a2), v24
vluxei64.v v4, (a0), v24
vluxei64.v v24, (a2), v16
vluxei64.v v28, (a0), v16
vmul.vv v16, v4, v0
vnmsac.vv v16, v28, v24
add a1, a3, t2
vs4r.v v16, (a1)
vmul.vv v16, v28, v0
vmacc.vv v16, v24, v4
add a1, a3, a5
vs4r.v v16, (a1)
add a4, a4, a7
add a3, a3, t1
bne a4, t0, .LBB0_3
# %bb.4: # %middle.block
beqz a6, .LBB0_7
.LBB0_5: # %for.body.preheader
lui a0, 1048575
addiw a0, a0, -904
add a6, t0, a0
lui a0, %hi(d)
addi a0, a0, %lo(d)
slli a1, t0, 2
add t2, a1, a0
lui a2, %hi(a)
addi a2, a2, %lo(a)
add a2, a2, a1
slli a4, t0, 3
lui a1, %hi(c)
addi a1, a1, %lo(c)
add a1, a1, a4
addi a1, a1, 4
lui a5, %hi(b)
addi a5, a5, %lo(b)
add a4, a4, a5
addi a4, a4, 4
.LBB0_6: # %for.body
# =>This Inner Loop Header: Depth=1
lw a7, 0(a4)
lw t0, 0(a1)
lw a5, -4(a4)
lw a3, -4(a1)
mv t1, a6
mulw a6, t0, a7
mulw a0, a3, a5
subw a0, a6, a0
sw a0, 0(a2)
mulw a0, a5, t0
mulw a3, a3, a7
addw a0, a0, a3
sw a0, 0(t2)
addi a6, t1, 1
addi t2, t2, 4
addi a2, a2, 4
addi a1, a1, 8
addi a4, a4, 8
bgeu a6, t1, .LBB0_6
.LBB0_7: # %for.end
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type b,@object # @b
.bss
.globl b
.p2align 2
b:
.zero 40000
.size b, 40000
.type c,@object # @c
.globl c
.p2align 2
c:
.zero 40000
.size c, 40000
.type a,@object # @a
.globl a
.p2align 2
a:
.zero 40000
.size a, 40000
.type d,@object # @d
.globl d
.p2align 2
d:
.zero 40000
.size d, 40000
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
12. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
csrr a2, vlenb
slli a7, a2, 1
lui a0, 2
addiw t0, a0, 1808
bgeu t0, a7, .LBB0_2
# %bb.1:
mv a1, zero
j .LBB0_5
.LBB0_2: # %vector.ph
mv a5, zero
remu a6, t0, a7
sub a1, t0, a6
vsetvli a0, zero, e32, m4, ta, mu
vid.v v8
slli t1, a2, 2
lui a0, %hi(a)
addi a3, a0, %lo(a)
slli a0, a2, 3
.LBB0_3: # %vector.body
# =>This Inner Loop Header: Depth=1
vadd.vx v12, v8, a2
vs4r.v v8, (a3)
add a4, a3, t1
vs4r.v v12, (a4)
add a5, a5, a7
vadd.vx v8, v12, a2
add a3, a3, a0
bne a5, a1, .LBB0_3
# %bb.4: # %middle.block
beqz a6, .LBB0_7
.LBB0_5: # %for.body.preheader
lui a0, %hi(a)
addi a0, a0, %lo(a)
slli a2, a1, 2
add a0, a0, a2
.LBB0_6: # %for.body
# =>This Inner Loop Header: Depth=1
sw a1, 0(a0)
addi a1, a1, 1
addi a0, a0, 4
bne a1, t0, .LBB0_6
.LBB0_7: # %for.end
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type a,@object # @a
.bss
.globl a
.p2align 2
a:
.zero 40000
.size a, 40000
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
13. (D)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -80
sd ra, 72(sp) # 8-byte Folded Spill
sd s0, 64(sp) # 8-byte Folded Spill
sd s1, 56(sp) # 8-byte Folded Spill
sd s2, 48(sp) # 8-byte Folded Spill
sd s3, 40(sp) # 8-byte Folded Spill
sd s4, 32(sp) # 8-byte Folded Spill
sd s5, 24(sp) # 8-byte Folded Spill
sd s6, 16(sp) # 8-byte Folded Spill
addi s0, sp, 80
csrr a0, vlenb
addi a1, zero, 60
mul a0, a0, a1
sub sp, sp, a0
mv a0, zero
csrr a1, vlenb
slli t4, a1, 1
addi a3, zero, 1250
sltu a6, a3, t4
lui a2, %hi(b)
addi s2, a2, %lo(b)
lui a2, %hi(a)
addi s3, a2, %lo(a)
addi t2, zero, 1249
lui a2, %hi(out)
addi a7, a2, %lo(out)
lui a2, 10
addiw s6, a2, -960
lui a4, 2
addiw t0, a4, 1808
remu t1, a3, t4
sub s4, a3, t1
slli t3, s4, 3
vsetvli a3, zero, e64, m8, ta, mu
vid.v v8
vsll.vi v16, v8, 3
vadd.vx v8, v8, a1
vsll.vi v8, v8, 3
vsetvli zero, zero, e32, m4, ta, mu
vmv.v.i v24, 0
csrr a2, vlenb
addi a3, zero, 60
mul a2, a2, a3
sub a2, s0, a2
addi a2, a2, -80
vs4r.v v24, (a2) # Unknown-size Folded Spill
slli s5, a1, 4
mv t5, s3
mv t6, s2
csrr a1, vlenb
addi a2, zero, 48
mul a1, a1, a2
sub a1, s0, a1
addi a1, a1, -80
vs8r.v v16, (a1) # Unknown-size Folded Spill
csrr a1, vlenb
addi a2, zero, 56
mul a1, a1, a2
sub a1, s0, a1
addi a1, a1, -80
vs8r.v v8, (a1) # Unknown-size Folded Spill
j .LBB0_2
.LBB0_1: # %for.end
# in Loop: Header=BB0_2 Depth=1
slli a1, a0, 2
add a1, a1, a7
sw s1, 0(a1)
addi a0, a0, 1
add t6, t6, s6
add t5, t5, s6
beq a0, t0, .LBB0_9
.LBB0_2: # %for.body
# =>This Loop Header: Depth=1
# Child Loop BB0_5 Depth 2
# Child Loop BB0_8 Depth 2
beqz a6, .LBB0_4
# %bb.3: # in Loop: Header=BB0_2 Depth=1
mv a4, zero
mv s1, zero
j .LBB0_7
.LBB0_4: # %vector.ph
# in Loop: Header=BB0_2 Depth=1
mv s1, zero
mv a3, zero
vsetvli zero, zero, e32, m4, tu, mu
csrr a1, vlenb
addi a2, zero, 60
mul a1, a1, a2
sub a1, s0, a1
addi a1, a1, -80
vl4re8.v v0, (a1) # Unknown-size Folded Reload
vmv4r.v v4, v0
vmv.s.x v4, zero
.LBB0_5: # %vector.body
# Parent Loop BB0_2 Depth=1
# => This Inner Loop Header: Depth=2
vsetvli zero, zero, e64, m8, ta, mu
vadd.vx v24, v16, s1
csrr a1, vlenb
addi a2, zero, 56
mul a1, a1, a2
sub a1, s0, a1
addi a1, a1, -80
vl8re8.v v8, (a1) # Unknown-size Folded Reload
vadd.vx v8, v8, s1
csrr a1, vlenb
slli a1, a1, 3
sub a1, s0, a1
addi a1, a1, -80
vs8r.v v8, (a1) # Unknown-size Folded Spill
vmv.v.x v16, a0
vmul.vx v8, v16, s6
csrr a1, vlenb
addi a2, zero, 24
mul a1, a1, a2
sub a1, s0, a1
addi a1, a1, -80
vs8r.v v8, (a1) # Unknown-size Folded Spill
vadd.vx v8, v8, s3
vsll.vi v24, v24, 2
csrr a1, vlenb
slli a1, a1, 5
sub a1, s0, a1
addi a1, a1, -80
vs8r.v v24, (a1) # Unknown-size Folded Spill
csrr a1, vlenb
slli a1, a1, 3
sub a1, s0, a1
addi a1, a1, -80
vl8re8.v v16, (a1) # Unknown-size Folded Reload
vsll.vi v16, v16, 2
vadd.vv v24, v8, v24
csrr a1, vlenb
slli a1, a1, 4
sub a1, s0, a1
addi a1, a1, -80
vs8r.v v24, (a1) # Unknown-size Folded Spill
vadd.vv v8, v8, v16
csrr a1, vlenb
addi a2, zero, 40
mul a1, a1, a2
sub a1, s0, a1
addi a1, a1, -80
vs8r.v v8, (a1) # Unknown-size Folded Spill
vsetvli zero, zero, e32, m4, ta, mu
csrr a1, vlenb
slli a1, a1, 4
sub a1, s0, a1
addi a1, a1, -80
vl8re8.v v24, (a1) # Unknown-size Folded Reload
vluxei64.v v8, (zero), v24
csrr a1, vlenb
slli a1, a1, 3
sub a1, s0, a1
addi a1, a1, -80
vs4r.v v8, (a1) # Unknown-size Folded Spill
csrr a1, vlenb
addi a2, zero, 40
mul a1, a1, a2
sub a1, s0, a1
addi a1, a1, -80
vl8re8.v v24, (a1) # Unknown-size Folded Reload
vluxei64.v v8, (zero), v24
csrr a1, vlenb
slli a1, a1, 4
sub a1, s0, a1
addi a1, a1, -80
vs4r.v v8, (a1) # Unknown-size Folded Spill
vsetvli zero, zero, e64, m8, ta, mu
csrr a1, vlenb
addi a2, zero, 24
mul a1, a1, a2
sub a1, s0, a1
addi a1, a1, -80
vl8re8.v v8, (a1) # Unknown-size Folded Reload
vadd.vx v8, v8, s2
csrr a1, vlenb
slli a1, a1, 5
sub a1, s0, a1
addi a1, a1, -80
vl8re8.v v24, (a1) # Unknown-size Folded Reload
vadd.vv v24, v8, v24
vadd.vv v8, v8, v16
vsetvli zero, zero, e32, m4, ta, mu
vluxei64.v v16, (zero), v24
vluxei64.v v20, (zero), v8
csrr a1, vlenb
slli a1, a1, 3
sub a1, s0, a1
addi a1, a1, -80
vl4re8.v v8, (a1) # Unknown-size Folded Reload
vsub.vv v8, v8, v16
csrr a1, vlenb
slli a1, a1, 4
sub a1, s0, a1
addi a1, a1, -80
vl4re8.v v12, (a1) # Unknown-size Folded Reload
vsub.vv v12, v12, v20
csrr a1, vlenb
addi a2, zero, 48
mul a1, a1, a2
sub a1, s0, a1
addi a1, a1, -80
vl8re8.v v16, (a1) # Unknown-size Folded Reload
vadd.vv v4, v8, v4
vadd.vv v0, v12, v0
add a3, a3, t4
add s1, s1, s5
bne a3, s4, .LBB0_5
# %bb.6: # %middle.block
# in Loop: Header=BB0_2 Depth=1
vadd.vv v8, v0, v4
vsetvli a3, zero, e32, m1, ta, mu
vmv.v.i v12, 0
vsetvli a3, zero, e32, m4, ta, mu
vredsum.vs v8, v8, v12
vmv.x.s s1, v8
mv a4, t3
beqz t1, .LBB0_1
.LBB0_7: # %for.body3.preheader
# in Loop: Header=BB0_2 Depth=1
addi a3, a4, -8
slli a5, a4, 2
add a4, t6, a5
add a5, a5, t5
.LBB0_8: # %for.body3
# Parent Loop BB0_2 Depth=1
# => This Inner Loop Header: Depth=2
lw a1, 0(a5)
lw a2, 0(a4)
subw a1, a1, a2
addw s1, s1, a1
addi a3, a3, 8
addi a4, a4, 32
srli a1, a3, 3
addi a5, a5, 32
bltu a1, t2, .LBB0_8
j .LBB0_1
.LBB0_9: # %for.end14
lui a0, %hi(diff)
sw s1, %lo(diff)(a0)
csrr a0, vlenb
addi a1, zero, 60
mul a0, a0, a1
add sp, sp, a0
ld s6, 16(sp) # 8-byte Folded Reload
ld s5, 24(sp) # 8-byte Folded Reload
ld s4, 32(sp) # 8-byte Folded Reload
ld s3, 40(sp) # 8-byte Folded Reload
ld s2, 48(sp) # 8-byte Folded Reload
ld s1, 56(sp) # 8-byte Folded Reload
ld s0, 64(sp) # 8-byte Folded Reload
ld ra, 72(sp) # 8-byte Folded Reload
addi sp, sp, 80
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type diff,@object # @diff
.section .sbss,"aw",@nobits
.globl diff
.p2align 2
diff:
.word 0 # 0x0
.size diff, 4
.type a,@object # @a
.bss
.globl a
.p2align 2
a:
.zero 400000000
.size a, 400000000
.type b,@object # @b
.globl b
.p2align 2
b:
.zero 400000000
.size b, 400000000
.type out,@object # @out
.globl out
.p2align 2
out:
.zero 40000
.size out, 40000
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
14. (N)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -96
sd ra, 88(sp) # 8-byte Folded Spill
sd s0, 80(sp) # 8-byte Folded Spill
sd s1, 72(sp) # 8-byte Folded Spill
sd s2, 64(sp) # 8-byte Folded Spill
sd s3, 56(sp) # 8-byte Folded Spill
sd s4, 48(sp) # 8-byte Folded Spill
sd s5, 40(sp) # 8-byte Folded Spill
sd s6, 32(sp) # 8-byte Folded Spill
sd s7, 24(sp) # 8-byte Folded Spill
addi s0, sp, 96
csrr a0, vlenb
addi a1, zero, 76
mul a0, a0, a1
sub sp, sp, a0
mv s6, zero
csrr s5, vlenb
slli s2, s5, 1
lui a2, 2
addiw t4, a2, 1808
sltu t3, t4, s2
lui a2, %hi(coeff)
addi a6, a2, %lo(coeff)
lui a2, %hi(in)
addi a7, a2, %lo(in)
lui a2, 1048574
addiw t0, a2, -1808
lui a2, 10
addiw s7, a2, -960
lui a2, %hi(out)
addi t1, a2, %lo(out)
remu t2, t4, s2
sub s3, t4, t2
vsetvli a2, zero, e64, m8, ta, mu
vid.v v8
csrr a0, vlenb
addi a1, zero, 72
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vs8r.v v8, (a0) # Unknown-size Folded Spill
vsetvli zero, zero, e32, m4, ta, mu
vmv.v.i v8, 0
csrr a0, vlenb
addi a1, zero, 76
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vs4r.v v8, (a0) # Unknown-size Folded Spill
vsetvli zero, zero, e64, m8, ta, mu
vmv.v.x v8, a7
vmv.v.x v16, a6
csrr a0, vlenb
addi a1, zero, 56
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vs8r.v v8, (a0) # Unknown-size Folded Spill
csrr a0, vlenb
slli a0, a0, 6
sub a0, s0, a0
addi a0, a0, -96
vs8r.v v16, (a0) # Unknown-size Folded Spill
j .LBB0_2
.LBB0_1: # %for.end16
# in Loop: Header=BB0_2 Depth=1
slli a0, s6, 2
add a0, a0, t1
addi s6, s6, 1
sw s1, 0(a0)
beq s6, t4, .LBB0_11
.LBB0_2: # %for.body
# =>This Loop Header: Depth=1
# Child Loop BB0_4 Depth 2
# Child Loop BB0_7 Depth 3
# Child Loop BB0_10 Depth 3
mv s4, zero
mv s1, zero
mv t5, a7
mv t6, a6
j .LBB0_4
.LBB0_3: # %for.inc14
# in Loop: Header=BB0_4 Depth=2
addi s4, s4, 1
addi t6, t6, 4
addi t5, t5, 4
beq s4, t4, .LBB0_1
.LBB0_4: # %for.cond4.preheader
# Parent Loop BB0_2 Depth=1
# => This Loop Header: Depth=2
# Child Loop BB0_7 Depth 3
# Child Loop BB0_10 Depth 3
beqz t3, .LBB0_6
# %bb.5: # in Loop: Header=BB0_4 Depth=2
mv a3, zero
j .LBB0_9
.LBB0_6: # %vector.ph
# in Loop: Header=BB0_4 Depth=2
mv a2, zero
vsetvli zero, zero, e32, m4, tu, mu
csrr a0, vlenb
addi a1, zero, 76
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vl4re8.v v28, (a0) # Unknown-size Folded Reload
vmv4r.v v24, v28
vmv.s.x v24, s1
csrr a0, vlenb
addi a1, zero, 72
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vl8re8.v v8, (a0) # Unknown-size Folded Reload
.LBB0_7: # %vector.body
# Parent Loop BB0_2 Depth=1
# Parent Loop BB0_4 Depth=2
# => This Inner Loop Header: Depth=3
csrr a0, vlenb
slli a0, a0, 4
sub a0, s0, a0
addi a0, a0, -96
vs4r.v v28, (a0) # Unknown-size Folded Spill
csrr a0, vlenb
addi a1, zero, 12
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vs4r.v v24, (a0) # Unknown-size Folded Spill
vsetvli zero, zero, e64, m8, ta, mu
vadd.vx v16, v8, s5
csrr a0, vlenb
slli a0, a0, 3
sub a0, s0, a0
addi a0, a0, -96
vs8r.v v16, (a0) # Unknown-size Folded Spill
vadd.vx v16, v8, s6
csrr a0, vlenb
addi a1, zero, 56
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vl8re8.v v24, (a0) # Unknown-size Folded Reload
vmadd.vx v16, s7, v24
vmv.v.x v0, s4
vsll.vi v0, v0, 2
vadd.vv v16, v16, v0
csrr a0, vlenb
addi a1, zero, 24
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vs8r.v v16, (a0) # Unknown-size Folded Spill
csrr a0, vlenb
slli a0, a0, 3
sub a0, s0, a0
addi a0, a0, -96
vl8re8.v v16, (a0) # Unknown-size Folded Reload
vadd.vx v16, v16, s6
vmadd.vx v16, s7, v24
vadd.vv v16, v16, v0
csrr a0, vlenb
addi a1, zero, 36
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vs8r.v v16, (a0) # Unknown-size Folded Spill
vsetvli zero, zero, e32, m4, ta, mu
csrr a0, vlenb
addi a1, zero, 24
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vl8re8.v v16, (a0) # Unknown-size Folded Reload
vluxei64.v v24, (zero), v16
csrr a0, vlenb
addi a1, zero, 28
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vs4r.v v24, (a0) # Unknown-size Folded Spill
csrr a0, vlenb
addi a1, zero, 36
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vl8re8.v v16, (a0) # Unknown-size Folded Reload
vluxei64.v v24, (zero), v16
csrr a0, vlenb
addi a1, zero, 24
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vs4r.v v24, (a0) # Unknown-size Folded Spill
vsetvli zero, zero, e64, m8, ta, mu
csrr a0, vlenb
slli a0, a0, 6
sub a0, s0, a0
addi a0, a0, -96
vl8re8.v v24, (a0) # Unknown-size Folded Reload
vmadd.vx v8, s7, v24
vadd.vv v8, v8, v0
csrr a0, vlenb
addi a1, zero, 36
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vs8r.v v8, (a0) # Unknown-size Folded Spill
csrr a0, vlenb
slli a0, a0, 3
sub a0, s0, a0
addi a0, a0, -96
vl8re8.v v8, (a0) # Unknown-size Folded Reload
vmv8r.v v16, v8
vmadd.vx v16, s7, v24
csrr a0, vlenb
slli a0, a0, 4
sub a0, s0, a0
addi a0, a0, -96
vl4re8.v v28, (a0) # Unknown-size Folded Reload
csrr a0, vlenb
addi a1, zero, 12
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vl4re8.v v24, (a0) # Unknown-size Folded Reload
vadd.vv v16, v16, v0
csrr a0, vlenb
addi a1, zero, 48
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vs8r.v v16, (a0) # Unknown-size Folded Spill
vsetvli zero, zero, e32, m4, ta, mu
csrr a0, vlenb
addi a1, zero, 36
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vl8re8.v v0, (a0) # Unknown-size Folded Reload
vluxei64.v v16, (zero), v0
csrr a0, vlenb
addi a1, zero, 40
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vs4r.v v16, (a0) # Unknown-size Folded Spill
csrr a0, vlenb
addi a1, zero, 48
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vl8re8.v v16, (a0) # Unknown-size Folded Reload
vluxei64.v v4, (zero), v16
csrr a0, vlenb
addi a1, zero, 28
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vl4re8.v v16, (a0) # Unknown-size Folded Reload
csrr a0, vlenb
addi a1, zero, 40
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vl4re8.v v20, (a0) # Unknown-size Folded Reload
vmacc.vv v24, v20, v16
csrr a0, vlenb
addi a1, zero, 24
mul a0, a0, a1
sub a0, s0, a0
addi a0, a0, -96
vl4re8.v v16, (a0) # Unknown-size Folded Reload
vmacc.vv v28, v4, v16
add a2, a2, s2
vsetvli zero, zero, e64, m8, ta, mu
vadd.vx v8, v8, s5
bne a2, s3, .LBB0_7
# %bb.8: # %middle.block
# in Loop: Header=BB0_4 Depth=2
vsetvli zero, zero, e32, m4, ta, mu
vadd.vv v8, v28, v24
vsetvli a2, zero, e32, m1, ta, mu
vmv.v.i v12, 0
vsetvli a2, zero, e32, m4, ta, mu
vredsum.vs v8, v8, v12
vmv.x.s s1, v8
mv a3, s3
beqz t2, .LBB0_3
.LBB0_9: # %for.body6.preheader
# in Loop: Header=BB0_4 Depth=2
add a4, a3, t0
mul a2, a3, s7
add a2, a2, t6
add a3, a3, s6
mul a3, a3, s7
add a3, a3, t5
.LBB0_10: # %for.body6
# Parent Loop BB0_2 Depth=1
# Parent Loop BB0_4 Depth=2
# => This Inner Loop Header: Depth=3
lw a1, 0(a3)
lw a0, 0(a2)
mv a5, a4
mulw a0, a0, a1
addw s1, s1, a0
addi a4, a4, 1
add a2, a2, s7
add a3, a3, s7
bgeu a4, a5, .LBB0_10
j .LBB0_3
.LBB0_11: # %for.end21
lui a0, %hi(sum)
sw s1, %lo(sum)(a0)
csrr a0, vlenb
addi a1, zero, 76
mul a0, a0, a1
add sp, sp, a0
ld s7, 24(sp) # 8-byte Folded Reload
ld s6, 32(sp) # 8-byte Folded Reload
ld s5, 40(sp) # 8-byte Folded Reload
ld s4, 48(sp) # 8-byte Folded Reload
ld s3, 56(sp) # 8-byte Folded Reload
ld s2, 64(sp) # 8-byte Folded Reload
ld s1, 72(sp) # 8-byte Folded Reload
ld s0, 80(sp) # 8-byte Folded Reload
ld ra, 88(sp) # 8-byte Folded Reload
addi sp, sp, 96
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type sum,@object # @sum
.section .sbss,"aw",@nobits
.globl sum
.p2align 2
sum:
.word 0 # 0x0
.size sum, 4
.type in,@object # @in
.bss
.globl in
.p2align 2
in:
.zero 400000000
.size in, 400000000
.type coeff,@object # @coeff
.globl coeff
.p2align 2
coeff:
.zero 400000000
.size coeff, 400000000
.type out,@object # @out
.globl out
.p2align 2
out:
.zero 40000
.size out, 40000
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
15. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
mv t5, zero
lui a0, %hi(a)
lwu a6, %lo(a)(a0)
lui a1, %hi(x_in)
addi a7, a1, %lo(x_in)
lui a1, %hi(c)
addi t0, a1, %lo(c)
lui a1, 2
addiw t3, a1, 1808
addi a0, a0, %lo(a)
addi t1, a0, 4
lui a0, %hi(x_out)
addi t2, a0, %lo(x_out)
j .LBB0_2
.LBB0_1: # %for.end
# in Loop: Header=BB0_2 Depth=1
add a0, t4, t2
addi t5, t5, 1
sw t6, 0(a0)
beq t5, t3, .LBB0_6
.LBB0_2: # %for.body
# =>This Loop Header: Depth=1
# Child Loop BB0_4 Depth 2
slli t4, t5, 2
add a0, t4, a7
lw a2, 0(a0)
mv a4, t1
mv a1, t3
mv a0, t0
mv t6, a6
j .LBB0_4
.LBB0_3: # %for.body3
# in Loop: Header=BB0_4 Depth=2
addi a0, a0, 4
addi a1, a1, -1
addi a4, a4, 4
beqz a1, .LBB0_1
.LBB0_4: # %for.body3
# Parent Loop BB0_2 Depth=1
# => This Inner Loop Header: Depth=2
lw a5, 0(a0)
lwu a3, 0(a4)
blt a5, a2, .LBB0_3
# %bb.5: # %for.body3
# in Loop: Header=BB0_4 Depth=2
mv t6, a3
j .LBB0_3
.LBB0_6: # %for.end13
lui a0, %hi(x)
sw a2, %lo(x)(a0)
lui a0, %hi(curr_a)
sw t6, %lo(curr_a)(a0)
lui a0, %hi(next_a)
sw a3, %lo(next_a)(a0)
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type x_in,@object # @x_in
.bss
.globl x_in
.p2align 2
x_in:
.zero 40000
.size x_in, 40000
.type x,@object # @x
.section .sbss,"aw",@nobits
.globl x
.p2align 2
x:
.word 0 # 0x0
.size x, 4
.type a,@object # @a
.bss
.globl a
.p2align 2
a:
.zero 40000
.size a, 40000
.type curr_a,@object # @curr_a
.section .sbss,"aw",@nobits
.globl curr_a
.p2align 2
curr_a:
.word 0 # 0x0
.size curr_a, 4
.type next_a,@object # @next_a
.globl next_a
.p2align 2
next_a:
.word 0 # 0x0
.size next_a, 4
.type c,@object # @c
.bss
.globl c
.p2align 2
c:
.zero 40000
.size c, 40000
.type x_out,@object # @x_out
.globl x_out
.p2align 2
x_out:
.zero 40000
.size x_out, 40000
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
16. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -80
sd ra, 72(sp) # 8-byte Folded Spill
sd s0, 64(sp) # 8-byte Folded Spill
sd s1, 56(sp) # 8-byte Folded Spill
sd s2, 48(sp) # 8-byte Folded Spill
sd s3, 40(sp) # 8-byte Folded Spill
sd s4, 32(sp) # 8-byte Folded Spill
sd s5, 24(sp) # 8-byte Folded Spill
sd s6, 16(sp) # 8-byte Folded Spill
addi s0, sp, 80
csrr a1, vlenb
addi a2, zero, 24
mul a1, a1, a2
sub sp, sp, a1
ld t0, 40(s0)
ld t1, 32(s0)
ld t2, 24(s0)
ld t4, 16(s0)
ld t5, 8(s0)
ld t6, 0(s0)
csrr s3, vlenb
lui t3, 2
addiw a1, t3, 1808
bltu a1, s3, .LBB0_2
# %bb.1: # %vector.memcheck
lui a1, 29
addiw a1, a1, 1216
add a2, a4, a1
add a1, a1, a0
sltu a1, a4, a1
sltu a2, a0, a2
and a2, a2, a1
beqz a2, .LBB0_6
.LBB0_2:
mv s5, zero
mv s6, a0
mv a3, a4
.LBB0_3: # %for.body.preheader
addiw t3, t3, 1807
.LBB0_4: # %for.body
# =>This Inner Loop Header: Depth=1
lw a4, 0(s6)
lw s1, 4(s6)
lw a0, 8(s6)
mulw a1, a4, a5
mulw a2, s1, a6
addw a1, a1, a2
mulw a2, a0, a7
addw a1, a1, a2
sw a1, 0(a3)
mulw a1, a4, t6
mulw a2, s1, t5
addw a1, a1, a2
mulw a2, a0, t4
addw a1, a1, a2
sw a1, 4(a3)
mulw a1, a4, t2
mulw a2, s1, t1
addw a1, a1, a2
mulw a0, a0, t0
addw a0, a0, a1
sw a0, 8(a3)
sext.w a0, s5
addiw s5, s5, 1
addi s6, s6, 12
addi a3, a3, 12
bltu a0, t3, .LBB0_4
.LBB0_5: # %for.end
csrr a0, vlenb
addi a1, zero, 24
mul a0, a0, a1
add sp, sp, a0
ld s6, 16(sp) # 8-byte Folded Reload
ld s5, 24(sp) # 8-byte Folded Reload
ld s4, 32(sp) # 8-byte Folded Reload
ld s3, 40(sp) # 8-byte Folded Reload
ld s2, 48(sp) # 8-byte Folded Reload
ld s1, 56(sp) # 8-byte Folded Reload
ld s0, 64(sp) # 8-byte Folded Reload
ld ra, 72(sp) # 8-byte Folded Reload
addi sp, sp, 80
ret
.LBB0_6: # %vector.ph
mv s1, zero
lui a1, 2
addiw a1, a1, 1808
remu s2, a1, s3
sub s5, a1, s2
addi s4, zero, 12
mul a3, s5, s4
vsetvli a2, zero, e64, m8, ta, mu
vid.v v8
addi a2, zero, 3
vmul.vx v8, v8, a2
add s6, a0, a3
add a3, a3, a4
mul s4, s3, s4
vsll.vi v8, v8, 2
csrr a1, vlenb
addi a2, zero, 24
mul a1, a1, a2
sub a1, s0, a1
addi a1, a1, -80
vs8r.v v8, (a1) # Unknown-size Folded Spill
.LBB0_7: # %vector.body
# =>This Inner Loop Header: Depth=1
vsetvli zero, zero, e64, m8, ta, mu
csrr a1, vlenb
addi a2, zero, 24
mul a1, a1, a2
sub a1, s0, a1
addi a1, a1, -80
vl8re8.v v16, (a1) # Unknown-size Folded Reload
vadd.vx v0, v16, a0
vadd.vx v8, v16, a4
csrr a1, vlenb
slli a1, a1, 3
sub a1, s0, a1
addi a1, a1, -80
vs8r.v v8, (a1) # Unknown-size Folded Spill
vadd.vi v8, v0, 4
vsetvli zero, zero, e32, m4, ta, mu
vluxei64.v v24, (a0), v16
vsetvli zero, zero, e64, m8, ta, mu
vadd.vi v0, v0, 8
vsetvli zero, zero, e32, m4, ta, mu
vluxei64.v v28, (zero), v8
vluxei64.v v12, (zero), v0
vmul.vx v8, v28, a6
vmacc.vx v8, a5, v24
vmacc.vx v8, a7, v12
vsetvli zero, zero, e64, m8, ta, mu
csrr a1, vlenb
slli a1, a1, 3
sub a1, s0, a1
addi a1, a1, -80
vl8re8.v v0, (a1) # Unknown-size Folded Reload
vadd.vi v0, v0, 4
csrr a1, vlenb
slli a1, a1, 4
sub a1, s0, a1
addi a1, a1, -80
vs8r.v v0, (a1) # Unknown-size Folded Spill
vsetvli zero, zero, e32, m4, ta, mu
vsoxei64.v v8, (a4), v16
vmul.vx v8, v28, t5
vmacc.vx v8, t6, v24
vmacc.vx v8, t4, v12
vsetvli zero, zero, e64, m8, ta, mu
csrr a1, vlenb
slli a1, a1, 3
sub a1, s0, a1
addi a1, a1, -80
vl8re8.v v16, (a1) # Unknown-size Folded Reload
vadd.vi v0, v16, 8
vsetvli zero, zero, e32, m4, ta, mu
csrr a1, vlenb
slli a1, a1, 4
sub a1, s0, a1
addi a1, a1, -80
vl8re8.v v16, (a1) # Unknown-size Folded Reload
vsoxei64.v v8, (zero), v16
vmul.vx v8, v28, t1
vmacc.vx v8, t2, v24
vmacc.vx v8, t0, v12
vsoxei64.v v8, (zero), v0
add s1, s1, s3
add a0, a0, s4
add a4, a4, s4
bne s1, s5, .LBB0_7
# %bb.8: # %middle.block
bnez s2, .LBB0_3
j .LBB0_5
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
17. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
lui a0, %hi(in)
addi a0, a0, %lo(in)
vsetivli zero, 2, e64, m1, ta, mu
vle64.v v8, (a0)
lui a0, %hi(out)
addi a0, a0, %lo(out)
vse64.v v8, (a0)
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type in,@object # @in
.bss
.globl in
.p2align 3
in:
.zero 80000
.size in, 80000
.type out,@object # @out
.globl out
.p2align 3
out:
.zero 80000
.size out, 80000
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
18. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
lui a0, %hi(sum1)
lwu a1, %lo(sum1)(a0)
lui a0, %hi(sum2)
lwu a0, %lo(sum2)(a0)
csrr a3, vlenb
addi a2, zero, 64
bgeu a2, a3, .LBB0_2
# %bb.1:
mv a2, zero
j .LBB0_5
.LBB0_2: # %vector.ph
mv a5, zero
remu a6, a2, a3
sub a2, a2, a6
vsetvli a4, zero, e64, m8, ta, mu
vid.v v16
vsetvli zero, zero, e32, m4, ta, mu
vmv.v.i v12, 0
vsetvli zero, zero, e32, m4, tu, mu
vmv4r.v v8, v12
vmv.s.x v8, a1
vmv.s.x v12, a0
lui a0, %hi(a)
addi a0, a0, %lo(a)
.LBB0_3: # %vector.body
# =>This Inner Loop Header: Depth=1
vsetvli zero, zero, e64, m8, ta, mu
vsll.vi v24, v16, 3
vsetvli zero, zero, e32, m4, ta, mu
vluxei64.v v0, (a0), v24
vadd.vv v8, v8, v0
vsetvli zero, zero, e64, m8, ta, mu
vor.vi v24, v24, 4
vsetvli zero, zero, e32, m4, ta, mu
vluxei64.v v0, (a0), v24
vadd.vv v12, v12, v0
add a5, a5, a3
vsetvli zero, zero, e64, m8, ta, mu
vadd.vx v16, v16, a3
bne a5, a2, .LBB0_3
# %bb.4: # %middle.block
vsetvli a0, zero, e32, m1, ta, mu
vmv.v.i v16, 0
vsetvli a0, zero, e32, m4, ta, mu
vredsum.vs v12, v12, v16
vmv.x.s a0, v12
vredsum.vs v8, v8, v16
vmv.x.s a1, v8
beqz a6, .LBB0_7
.LBB0_5: # %for.body.preheader
addi a3, a2, -64
slli a2, a2, 3
lui a4, %hi(a)
addi a4, a4, %lo(a)
add a2, a2, a4
addi a2, a2, 4
.LBB0_6: # %for.body
# =>This Inner Loop Header: Depth=1
lw a4, -4(a2)
lw a5, 0(a2)
addw a1, a1, a4
mv a4, a3
addw a0, a0, a5
addi a3, a3, 1
addi a2, a2, 8
bgeu a3, a4, .LBB0_6
.LBB0_7: # %for.end
lui a2, %hi(sum1)
sw a1, %lo(sum1)(a2)
lui a1, %hi(sum2)
sw a0, %lo(sum2)(a1)
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type a,@object # @a
.bss
.globl a
.p2align 2
a:
.zero 512
.size a, 512
.type sum1,@object # @sum1
.section .sbss,"aw",@nobits
.globl sum1
.p2align 2
sum1:
.word 0 # 0x0
.size sum1, 4
.type sum2,@object # @sum2
.globl sum2
.p2align 2
sum2:
.word 0 # 0x0
.size sum2, 4
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
19. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
lui a0, %hi(sum)
lwu a0, %lo(sum)(a0)
csrr a2, vlenb
addi a1, zero, 64
bgeu a1, a2, .LBB0_2
# %bb.1:
mv a1, zero
j .LBB0_5
.LBB0_2: # %vector.ph
mv a4, zero
remu a3, a1, a2
sub a1, a1, a3
vsetvli a5, zero, e64, m8, ta, mu
vid.v v8
vsetvli zero, zero, e32, m4, ta, mu
vmv.v.i v16, 0
vsetvli zero, zero, e32, m4, tu, mu
vmv.s.x v16, a0
lui a0, %hi(a)
addi a0, a0, %lo(a)
.LBB0_3: # %vector.body
# =>This Inner Loop Header: Depth=1
vsetvli zero, zero, e64, m8, ta, mu
vsll.vi v24, v8, 3
vsetvli zero, zero, e32, m4, ta, mu
vluxei64.v v20, (a0), v24
vadd.vv v16, v16, v20
vsetvli zero, zero, e64, m8, ta, mu
vor.vi v24, v24, 4
vsetvli zero, zero, e32, m4, ta, mu
vluxei64.v v20, (a0), v24
vadd.vv v16, v20, v16
add a4, a4, a2
vsetvli zero, zero, e64, m8, ta, mu
vadd.vx v8, v8, a2
bne a4, a1, .LBB0_3
# %bb.4: # %middle.block
vsetvli a0, zero, e32, m1, ta, mu
vmv.v.i v8, 0
vsetvli a0, zero, e32, m4, ta, mu
vredsum.vs v8, v16, v8
vmv.x.s a0, v8
beqz a3, .LBB0_7
.LBB0_5: # %for.body.preheader
addi a2, a1, -64
slli a1, a1, 3
lui a3, %hi(a)
addi a3, a3, %lo(a)
add a1, a1, a3
addi a1, a1, 4
.LBB0_6: # %for.body
# =>This Inner Loop Header: Depth=1
lw a3, -4(a1)
lw a4, 0(a1)
addw a0, a0, a3
mv a3, a2
addw a0, a0, a4
addi a2, a2, 1
addi a1, a1, 8
bgeu a2, a3, .LBB0_6
.LBB0_7: # %for.end
lui a1, %hi(sum)
sw a0, %lo(sum)(a1)
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type a,@object # @a
.bss
.globl a
.p2align 2
a:
.zero 512
.size a, 512
.type sum,@object # @sum
.section .sbss,"aw",@nobits
.globl sum
.p2align 2
sum:
.word 0 # 0x0
.size sum, 4
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
20. (D)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -64
sd ra, 56(sp) # 8-byte Folded Spill
sd s0, 48(sp) # 8-byte Folded Spill
addi s0, sp, 64
blez a2, .LBB0_3
# %bb.1: # %for.body.preheader
mv a6, zero
slli a7, a3, 2
addi a1, a1, 16
slli t6, a3, 1
addi t0, s0, -40
addi t1, s0, -44
addi t2, s0, -48
addi t3, s0, -52
addi t4, s0, -56
addi t5, s0, -60
.LBB0_2: # %for.body
# =>This Inner Loop Header: Depth=1
addi a3, a1, -16
vsetivli zero, 8, e16, mf2, ta, mu
vle16.v v8, (a3)
vsetvli zero, zero, e32, m1, ta, mu
vsext.vf2 v9, v8
lh a3, 0(a1)
vsetivli zero, 1, e32, m1, ta, mu
vslidedown.vi v8, v9, 1
vslidedown.vi v10, v9, 2
vslidedown.vi v11, v9, 3
vslidedown.vi v12, v9, 4
vslidedown.vi v13, v9, 5
vslidedown.vi v14, v9, 6
vslidedown.vi v15, v9, 7
vse32.v v15, (t0)
vse32.v v14, (t1)
vse32.v v13, (t2)
vse32.v v12, (t3)
vse32.v v11, (t4)
vse32.v v10, (t5)
sw a3, -36(s0)
addi a3, s0, -64
vse32.v v8, (a3)
vsetivli zero, 8, e32, m1, ta, mu
vle32.v v8, (a0)
addi a3, s0, -64
vle32.v v10, (a3)
vmacc.vx v8, a4, v9
vmacc.vx v8, a5, v10
vse32.v v8, (a0)
addiw a6, a6, 1
add a0, a0, a7
add a1, a1, t6
blt a6, a2, .LBB0_2
.LBB0_3: # %for.end
ld s0, 48(sp) # 8-byte Folded Reload
ld ra, 56(sp) # 8-byte Folded Reload
addi sp, sp, 64
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
21. (D)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
blez a1, .LBB0_6
# %bb.1: # %for.body.preheader
slli a2, a1, 32
srli a3, a2, 32
csrr a7, vlenb
slli t0, a7, 1
bltu a3, t0, .LBB0_3
# %bb.2: # %vector.scevcheck
addi a2, a3, -1
addiw a4, a1, -1
addiw a5, a3, -1
sltu a4, a4, a5
srli a2, a2, 32
snez a2, a2
or a4, a4, a2
beqz a4, .LBB0_8
.LBB0_3:
mv a2, zero
mv a6, a3
mv a3, a1
.LBB0_4: # %for.body.preheader15
addi a1, a6, 1
addi a4, zero, 1
.LBB0_5: # %for.body
# =>This Inner Loop Header: Depth=1
addiw a3, a3, -1
slli a5, a3, 32
srli a5, a5, 30
add a5, a5, a0
lw a5, 0(a5)
addi a1, a1, -1
addw a2, a2, a5
blt a4, a1, .LBB0_5
j .LBB0_7
.LBB0_6:
mv a2, zero
.LBB0_7: # %for.end
mv a0, a2
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.LBB0_8: # %vector.ph
mv a2, zero
remu a6, a3, t0
sub t1, a3, a6
subw a3, a1, t1
addi a4, zero, 1
sub a4, a4, a7
neg t2, a7
vsetvli a5, zero, e32, m4, ta, mu
vmv.v.i v8, 0
vsetvli zero, zero, e32, m4, tu, mu
vmv4r.v v12, v8
vmv.s.x v12, zero
slli t3, a4, 2
addi a5, a7, -1
vsetvli zero, zero, e32, m4, ta, mu
vid.v v16
vrsub.vx v16, v16, a5
slli a7, t2, 2
.LBB0_9: # %vector.body
# =>This Inner Loop Header: Depth=1
not a5, a2
addw a5, a5, a1
slli a5, a5, 32
srli a5, a5, 30
add a5, a5, a0
add a4, a5, t3
vl4re32.v v20, (a4)
add a4, a5, a7
add a4, a4, t3
vl4re32.v v24, (a4)
vrgather.vv v28, v20, v16
vrgather.vv v20, v24, v16
vadd.vv v12, v28, v12
add a2, a2, t0
vadd.vv v8, v20, v8
bne a2, t1, .LBB0_9
# %bb.10: # %middle.block
vadd.vv v8, v8, v12
vsetvli a1, zero, e32, m1, ta, mu
vmv.v.i v12, 0
vsetvli a1, zero, e32, m4, ta, mu
vredsum.vs v8, v8, v12
vmv.x.s a2, v8
bnez a6, .LBB0_4
j .LBB0_7
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
22. (S)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
blez a3, .LBB0_9
# %bb.1: # %for.body.preheader
slli a3, a3, 32
srli a6, a3, 32
csrr t0, vlenb
slli t1, t0, 1
bgeu a6, t1, .LBB0_3
# %bb.2:
mv t5, zero
j .LBB0_7
.LBB0_3: # %vector.memcheck
slli a3, a6, 2
add a4, a0, a3
add a5, a1, a3
add a7, a2, a3
sltu a5, a0, a5
sltu a3, a1, a4
and a3, a3, a5
sltu a5, a0, a7
sltu a4, a2, a4
and a4, a4, a5
or a3, a3, a4
mv t5, zero
bnez a3, .LBB0_7
# %bb.4: # %vector.ph
mv a3, zero
mv a4, zero
remu a7, a6, t1
sub t5, a6, a7
slli t4, t0, 2
add t2, a0, t4
slli t0, t0, 3
add t3, a2, t4
add t4, t4, a1
.LBB0_5: # %vector.body
# =>This Inner Loop Header: Depth=1
add a5, a1, a3
vl4re32.v v8, (a5)
add a5, t4, a3
vl4re32.v v12, (a5)
add a5, a2, a3
vl4re32.v v16, (a5)
add a5, t3, a3
vl4re32.v v20, (a5)
vsetvli a5, zero, e32, m4, ta, mu
vmul.vv v8, v16, v8
vmul.vv v12, v20, v12
add a5, a0, a3
vs4r.v v8, (a5)
add a5, t2, a3
vs4r.v v12, (a5)
add a4, a4, t1
add a3, a3, t0
bne a4, t5, .LBB0_5
# %bb.6: # %middle.block
beqz a7, .LBB0_9
.LBB0_7: # %for.body.preheader23
sub a3, a6, t5
slli a4, t5, 2
add a0, a0, a4
add a2, a2, a4
add a1, a1, a4
.LBB0_8: # %for.body
# =>This Inner Loop Header: Depth=1
lw a4, 0(a1)
lw a5, 0(a2)
mulw a4, a5, a4
sw a4, 0(a0)
addi a3, a3, -1
addi a0, a0, 4
addi a2, a2, 4
addi a1, a1, 4
bnez a3, .LBB0_8
.LBB0_9: # %for.end
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
23. (D)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
csrr a6, vlenb
slli t0, a6, 2
addi a2, zero, 256
bltu a2, t0, .LBB0_2
# %bb.1: # %vector.memcheck
addi a2, a1, 1024
addi a3, a0, 512
sltu a3, a1, a3
sltu a2, a0, a2
and a3, a3, a2
beqz a3, .LBB0_6
.LBB0_2:
mv a2, zero
mv a3, a1
mv a5, a0
.LBB0_3: # %for.body.preheader
addi a6, zero, 255
.LBB0_4: # %for.body
# =>This Inner Loop Header: Depth=1
lhu a1, 0(a5)
addi a5, a5, 2
slliw a1, a1, 7
addi a4, a3, 4
sext.w a0, a2
addiw a2, a2, 1
sw a1, 0(a3)
mv a3, a4
bltu a0, a6, .LBB0_4
.LBB0_5: # %for.end
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.LBB0_6: # %vector.ph
mv t2, zero
addi a2, zero, 256
remu a7, a2, t0
sub a2, a2, a7
slli a3, a2, 2
add a3, a3, a1
slli a5, a2, 1
add a5, a5, a0
slli t1, a6, 3
slli a6, a6, 4
.LBB0_7: # %vector.body
# =>This Inner Loop Header: Depth=1
vl4re16.v v8, (a0)
add a4, a0, t0
vl4re16.v v12, (a4)
vsetvli a4, zero, e32, m8, ta, mu
vzext.vf2 v16, v8
vzext.vf2 v24, v12
vsll.vi v8, v16, 7
vsll.vi v16, v24, 7
vs8r.v v8, (a1)
add a4, a1, t1
vs8r.v v16, (a4)
add t2, t2, t0
add a0, a0, t1
add a1, a1, a6
bne t2, a2, .LBB0_7
# %bb.8: # %middle.block
bnez a7, .LBB0_3
j .LBB0_5
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
24. (D)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
csrr t2, vlenb
slli a7, t2, 1
addi a2, zero, 1024
bgeu a2, a7, .LBB0_2
# %bb.1:
mv t0, zero
j .LBB0_5
.LBB0_2: # %vector.ph
mv a5, zero
mv a4, zero
remu a6, a2, a7
sub t0, a2, a6
vsetvli a2, zero, e16, m2, ta, mu
vmv.v.x v10, a1
lui a2, %hi(c)
addi t3, a2, %lo(c)
slli a3, t2, 2
add t1, a3, t3
slli t2, t2, 3
lui a2, %hi(b)
addi t5, a2, %lo(b)
add t4, a3, t5
lui a2, %hi(a)
addi a2, a2, %lo(a)
add t6, a3, a2
.LBB0_3: # %vector.body
# =>This Inner Loop Header: Depth=1
add a3, a5, a2
vl4re32.v v12, (a3)
add a3, t6, a5
vl4re32.v v16, (a3)
add a3, a5, t5
vl4re32.v v20, (a3)
add a3, t4, a5
vl4re32.v v24, (a3)
vsetvli zero, zero, e32, m4, ta, mu
vmflt.vv v0, v12, v20
vmflt.vv v8, v16, v24
vsetvli zero, zero, e16, m2, ta, mu
vmerge.vxm v12, v10, a0, v0
vmv1r.v v0, v8
vmerge.vxm v8, v10, a0, v0
vsetvli zero, zero, e32, m4, ta, mu
vsext.vf2 v16, v12
vsext.vf2 v12, v8
add a3, a5, t3
vs4r.v v16, (a3)
add a3, t1, a5
vs4r.v v12, (a3)
add a4, a4, a7
add a5, a5, t2
bne a4, t0, .LBB0_3
# %bb.4: # %middle.block
beqz a6, .LBB0_9
.LBB0_5: # %for.body.preheader
addi a4, t0, -1024
lui a2, %hi(c)
addi a2, a2, %lo(c)
slli a6, t0, 2
add a2, a2, a6
lui a3, %hi(b)
addi a3, a3, %lo(b)
add a3, a3, a6
lui a5, %hi(a)
addi a5, a5, %lo(a)
add a5, a5, a6
j .LBB0_7
.LBB0_6: # %for.body
# in Loop: Header=BB0_7 Depth=1
sw a4, 0(a2)
addi a4, a6, 1
addi a2, a2, 4
addi a3, a3, 4
addi a5, a5, 4
bltu a4, a6, .LBB0_9
.LBB0_7: # %for.body
# =>This Inner Loop Header: Depth=1
flw ft0, 0(a5)
flw ft1, 0(a3)
flt.s a7, ft0, ft1
mv a6, a4
mv a4, a0
bnez a7, .LBB0_6
# %bb.8: # %for.body
# in Loop: Header=BB0_7 Depth=1
mv a4, a1
j .LBB0_6
.LBB0_9: # %for.end
ld s0, 0(sp) # 8-byte Folded Reload
ld ra, 8(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type a,@object # @a
.bss
.globl a
.p2align 2
a:
.zero 4096
.size a, 4096
.type b,@object # @b
.globl b
.p2align 2
b:
.zero 4096
.size b, 4096
.type c,@object # @c
.globl c
.p2align 2
c:
.zero 4096
.size c, 4096
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```
25. (D)
```c=
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_v0p10"
.option nopic
.file "basicLoop.c"
.globl foo # -- Begin function foo
.p2align 1
.type foo,@function
foo: # @foo
# %bb.0: # %entry
addi sp, sp, -48
sd ra, 40(sp) # 8-byte Folded Spill
sd s0, 32(sp) # 8-byte Folded Spill
sd s1, 24(sp) # 8-byte Folded Spill
sd s2, 16(sp) # 8-byte Folded Spill
addi s0, sp, 48
csrr a0, vlenb
addi a1, zero, 12
mul a0, a0, a1
sub sp, sp, a0
csrr a0, vlenb
slli a7, a0, 1
addi a1, zero, 1024
bgeu a1, a7, .LBB0_2
# %bb.1:
mv t0, zero
j .LBB0_5
.LBB0_2: # %vector.ph
mv a3, zero
mv a4, zero
remu a6, a1, a7
sub t0, a1, a6
lui a1, %hi(j)
addi t3, a1, %lo(j)
slli s1, a0, 2
add t1, s1, t3
slli t2, a0, 3
lui a0, %hi(d)
addi t5, a0, %lo(d)
add t4, s1, t5
lui a0, %hi(c)
addi s2, a0, %lo(c)
add t6, s1, s2
lui a1, %hi(b)
addi a2, a1, %lo(b)
add a1, s1, a2
lui a5, %hi(a)
addi a5, a5, %lo(a)
add s1, s1, a5
vsetvli a0, zero, e32, m4, ta, mu
vmv.v.i v8, 0
csrr a0, vlenb
addi ra, zero, 12
mul a0, a0, ra
sub a0, s0, a0
addi a0, a0, -48
vs4r.v v8, (a0) # Unknown-size Folded Spill
.LBB0_3: # %vector.body
# =>This Inner Loop Header: Depth=1
add a0, a3, a5
vl4re32.v v8, (a0)
csrr a0, vlenb
slli a0, a0, 2
sub a0, s0, a0
addi a0, a0, -48
vs4r.v v8, (a0) # Unknown-size Folded Spill
add a0, s1, a3
vl4re32.v v8, (a0)
csrr a0, vlenb
slli a0, a0, 3
sub a0, s0, a0
addi a0, a0, -48
vs4r.v v8, (a0) # Unknown-size Folded Spill
add a0, a3, a2
vl4re32.v v20, (a0)
add a0, a1, a3
vl4re32.v v24, (a0)
add a0, a3, s2
vl4re32.v v28, (a0)
add a0, t6, a3
vl4re32.v v0, (a0)
add a0, a3, t5
vl4re32.v v4, (a0)
add a0, t4, a3
vl4re32.v v12, (a0)
csrr a0, vlenb
slli a0, a0, 2
sub a0, s0, a0
addi a0, a0, -48
vl4re8.v v16, (a0) # Unknown-size Folded Reload
vmflt.vv v8, v16, v20
csrr a0, vlenb
slli a0, a0, 3
sub a0, s0, a0
addi a0, a0, -48
vl4re8.v v16, (a0) # Unknown-size Folded Reload
vmflt.vv v9, v16, v24
vmflt.vv v10, v28, v4
vmflt.vv v11, v0, v12
vmand.mm v0, v8, v10
vmand.mm v8, v9, v11
csrr a0, vlenb
addi ra, zero, 12
mul a0, a0, ra
sub a0, s0, a0
addi a0, a0, -48
vl4re8.v v16, (a0) # Unknown-size Folded Reload
vmerge.vim v12, v16, 1, v0
vmv1r.v v0, v8
vmerge.vim v8, v16, 1, v0
add a0, a3, t3
vs4r.v v12, (a0)
add a0, t1, a3
vs4r.v v8, (a0)
add a4, a4, a7
add a3, a3, t2
bne a4, t0, .LBB0_3
# %bb.4: # %middle.block
beqz a6, .LBB0_7
.LBB0_5: # %for.body.preheader
addi a6, t0, -1024
lui a1, %hi(j)
addi a1, a1, %lo(j)
slli a5, t0, 2
add a1, a1, a5
lui a2, %hi(d)
addi a2, a2, %lo(d)
add a2, a2, a5
lui a3, %hi(c)
addi a3, a3, %lo(c)
add a3, a3, a5
lui a4, %hi(b)
addi a4, a4, %lo(b)
add a4, a4, a5
lui s1, %hi(a)
addi s1, s1, %lo(a)
add a5, a5, s1
.LBB0_6: # %for.body
# =>This Inner Loop Header: Depth=1
flw ft0, 0(a5)
flw ft1, 0(a4)
flw ft2, 0(a3)
flw ft3, 0(a2)
flt.s a7, ft0, ft1
mv a0, a6
flt.s s1, ft2, ft3
and s1, a7, s1
sw s1, 0(a1)
addi a6, a6, 1
addi a1, a1, 4
addi a2, a2, 4
addi a3, a3, 4
addi a4, a4, 4
addi a5, a5, 4
bgeu a6, a0, .LBB0_6
.LBB0_7: # %for.end
csrr a0, vlenb
addi a1, zero, 12
mul a0, a0, a1
add sp, sp, a0
ld s2, 16(sp) # 8-byte Folded Reload
ld s1, 24(sp) # 8-byte Folded Reload
ld s0, 32(sp) # 8-byte Folded Reload
ld ra, 40(sp) # 8-byte Folded Reload
addi sp, sp, 48
ret
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
# -- End function
.type a,@object # @a
.bss
.globl a
.p2align 2
a:
.zero 4096
.size a, 4096
.type b,@object # @b
.globl b
.p2align 2
b:
.zero 4096
.size b, 4096
.type c,@object # @c
.globl c
.p2align 2
c:
.zero 4096
.size c, 4096
.type d,@object # @d
.globl d
.p2align 2
d:
.zero 4096
.size d, 4096
.type j,@object # @j
.globl j
.p2align 2
j:
.zero 4096
.size j, 4096
.ident "clang version 14.0.0 (https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi.git 235eb59f1632601d2b2ef584bb997d256cbfa47d)"
.section ".note.GNU-stack","",@progbits
```