diff --git a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/LambdaSubsetAbenchmarks.s b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/LambdaSubsetAbenchmarks.s index 5aacf47f..7ae92d69 100644 --- a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/LambdaSubsetAbenchmarks.s +++ b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/LambdaSubsetAbenchmarks.s @@ -103,8 +103,7 @@ _ZL23BM_PRESSURE_CALC_LAMBDARN9benchmark5StateE: # @_ZL23BM_PRESSURE_CALC_LAMBDA andi $a6, $a6, 1 movgr2fr.d $fa5, $zero xvrepli.b $xr6, 0 - lu52i.d $a7, $zero, 1023 - xvreplgr2vr.d $xr7, $a7 + xvldi $xr7, -912 b .LBB0_5 .p2align 4, , 16 .LBB0_4: # %"_Z6forallIZL23BM_PRESSURE_CALC_LAMBDARN9benchmark5StateEE3$_0Ev9simd_execiiT_.exit._Z6forallIZL23BM_PRESSURE_CALC_LAMBDARN9benchmark5StateEE3$_1Ev9simd_execiiT_.exit_crit_edge.us.us" @@ -534,14 +533,9 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b vldi $vr16, -864 vldi $vr17, -996 vldi $vr18, -872 - lu52i.d $a0, $zero, 1022 - xvreplgr2vr.d $xr19, $a0 - lu52i.d $a0, $zero, -1023 - xvreplgr2vr.d $xr20, $a0 - ori $a0, $zero, 0 - lu32i.d $a0, -524288 - lu52i.d $a0, $a0, 1024 - xvreplgr2vr.d $xr21, $a0 + xvldi $xr19, -928 + xvldi $xr20, -880 + xvldi $xr21, -1016 vldi $vr22, -912 b .LBB1_4 .p2align 4, , 16 @@ -579,7 +573,6 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b ld.d $a4, $sp, 376 # 8-byte Folded Reload ld.d $a5, $sp, 384 # 8-byte Folded Reload ld.d $a6, $sp, 416 # 8-byte Folded Reload - lu52i.d $fp, $zero, -1026 .p2align 4, , 16 .LBB1_8: # %vector.body288 # Parent Loop BB1_4 Depth=1 @@ -594,7 +587,7 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b xvld $xr7, $a3, 0 xvfadd.d $xr2, $xr2, $xr4 xvfadd.d $xr3, $xr3, $xr5 - xvreplgr2vr.d $xr4, $fp + xvldi $xr4, -800 xvfmul.d $xr5, $xr6, $xr4 xvfmul.d $xr4, $xr7, $xr4 xvld $xr6, $a2, -32 @@ -922,29 +915,29 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b ld.d $a2, $sp, 504 # 8-byte Folded Reload ld.d $a3, $sp, 472 # 8-byte Folded Reload move $a4, $a7 - ld.d $s2, $sp, 496 # 8-byte Folded Reload - ld.d $fp, $sp, 488 # 8-byte Folded Reload + ld.d $s1, $sp, 496 # 8-byte Folded Reload + ld.d $s8, $sp, 488 # 8-byte Folded Reload move $s5, $t1 - move $s4, $t3 - ld.d $s8, $sp, 512 # 8-byte Folded Reload - move $s6, $t2 - move $s0, $t6 - move $s1, $ra + move $fp, $t3 + ld.d $s4, $sp, 512 # 8-byte Folded Reload + move $s0, $t2 + move $s6, $t6 + move $s2, $ra ld.d $a0, $sp, 456 # 8-byte Folded Reload ld.d $a1, $sp, 448 # 8-byte Folded Reload b .LBB1_37 .p2align 4, , 16 .LBB1_35: # %._crit_edge.i # in Loop: Header=BB1_37 Depth=2 - fld.d $fs4, $s0, 0 - fld.d $fs5, $s1, 0 + fld.d $fs4, $s6, 0 + fld.d $fs5, $s2, 0 fmov.d $fa0, $fs2 .LBB1_36: # %"_ZZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateEENK3$_4clEi.exit" # in Loop: Header=BB1_37 Depth=2 fld.d $fa1, $s5, 0 - fld.d $fa2, $s4, 0 - fld.d $fa3, $s8, 0 - fld.d $fa4, $s6, 0 + fld.d $fa2, $fp, 0 + fld.d $fa3, $s4, 0 + fld.d $fa4, $s0, 0 fadd.d $fa1, $fa1, $fa2 fadd.d $fa2, $fa3, $fa4 fmul.d $fa2, $fa2, $ft8 @@ -959,15 +952,15 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b fsel $fa0, $fa0, $fs2, $fcc0 fcmp.clt.d $fcc0, $fa0, $ft1 fsel $fa0, $fa0, $ft1, $fcc0 - fst.d $fa0, $s0, 0 - addi.d $s1, $s1, 8 - addi.d $s0, $s0, 8 + fst.d $fa0, $s6, 0 + addi.d $s2, $s2, 8 addi.d $s6, $s6, 8 - addi.d $s8, $s8, 8 + addi.d $s0, $s0, 8 addi.d $s4, $s4, 8 - addi.d $s5, $s5, 8 addi.d $fp, $fp, 8 - addi.d $s2, $s2, 8 + addi.d $s5, $s5, 8 + addi.d $s8, $s8, 8 + addi.d $s1, $s1, 8 addi.d $a4, $a4, 8 addi.d $a3, $a3, 8 addi.d $a2, $a2, 8 @@ -984,8 +977,8 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b fld.d $fa0, $a2, 0 fld.d $fa1, $a3, 0 fld.d $fa2, $a4, 0 - fld.d $fs4, $s0, 0 - fld.d $fs5, $s1, 0 + fld.d $fs4, $s6, 0 + fld.d $fs5, $s2, 0 fmul.d $fa1, $fa1, $fa1 fmul.d $fa1, $fa1, $fa2 fld.d $fa2, $a1, %pc_lo12(.LCPI1_1) @@ -1050,8 +1043,8 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b .LBB1_41: # in Loop: Header=BB1_37 Depth=2 fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) .LBB1_42: # in Loop: Header=BB1_37 Depth=2 - fld.d $fa1, $s2, 0 - fld.d $fa2, $fp, 0 + fld.d $fa1, $s1, 0 + fld.d $fa2, $s8, 0 fmadd.d $fa0, $fa0, $fa1, $fa2 b .LBB1_36 .p2align 4, , 16 @@ -1856,105 +1849,103 @@ _ZL24BM_DEL_DOT_VEC_2D_LAMBDARN9benchmark5StateE: # @_ZL24BM_DEL_DOT_VEC_2D_LAMB alsl.d $a4, $s6, $s2, 3 addi.d $a5, $s3, 8 alsl.d $a6, $s6, $a5, 3 - alsl.d $a7, $s6, $s3, 3 - alsl.d $t0, $s6, $s4, 3 - ld.d $t1, $sp, 80 - pcalau12i $t2, %pc_hi20(.LCPI3_0) - vld $vr0, $t2, %pc_lo12(.LCPI3_0) - pcalau12i $t2, %pc_hi20(.LCPI3_1) - fld.d $fa1, $t2, %pc_lo12(.LCPI3_1) + ld.d $a7, $sp, 80 + pcalau12i $t0, %pc_hi20(.LCPI3_0) + vld $vr0, $t0, %pc_lo12(.LCPI3_0) + pcalau12i $t0, %pc_hi20(.LCPI3_1) + fld.d $fa1, $t0, %pc_lo12(.LCPI3_1) + alsl.d $t0, $s6, $s3, 3 + alsl.d $t1, $s6, $s4, 3 vldi $vr2, -928 - lu52i.d $t2, $zero, 1022 - vreplgr2vr.d $vr3, $t2 .p2align 4, , 16 .LBB3_5: # %.lr.ph.us # =>This Loop Header: Depth=1 # Child Loop BB3_6 Depth 2 move $t2, $a0 - move $t3, $t1 + move $t3, $a7 .p2align 4, , 16 .LBB3_6: # Parent Loop BB3_5 Depth=1 # => This Inner Loop Header: Depth=2 ld.w $t4, $t3, 0 slli.d $t4, $t4, 3 - vldx $vr7, $a3, $t4 - fldx.d $fa4, $a2, $t4 - fldx.d $fa5, $a3, $t4 - fldx.d $fa6, $s1, $t4 - fldx.d $ft0, $a1, $t4 - vldx $vr9, $s1, $t4 - fadd.d $fa4, $fa4, $fa5 - fsub.d $fa4, $fa4, $fa6 - fsub.d $fa4, $fa4, $ft0 - fmul.d $fa5, $fa4, $fa2 - vldx $vr8, $a4, $t4 - fldx.d $fa4, $a4, $t4 - vldx $vr10, $t0, $t4 - vldx $vr11, $s2, $t4 - fldx.d $fa6, $s2, $t4 - vpackod.d $vr12, $vr9, $vr8 - vpackev.d $vr13, $vr10, $vr8 + vldx $vr6, $a3, $t4 + fldx.d $fa3, $a2, $t4 + fldx.d $fa4, $a3, $t4 + fldx.d $fa5, $s1, $t4 + fldx.d $fa7, $a1, $t4 + vldx $vr8, $s1, $t4 + fadd.d $fa3, $fa3, $fa4 + fsub.d $fa3, $fa3, $fa5 + fsub.d $fa3, $fa3, $fa7 + fmul.d $fa4, $fa3, $fa2 + vldx $vr7, $a4, $t4 + fldx.d $fa3, $a4, $t4 + vldx $vr9, $t1, $t4 + vldx $vr10, $s2, $t4 + fldx.d $fa5, $s2, $t4 + vpackod.d $vr11, $vr8, $vr7 + vpackev.d $vr12, $vr9, $vr7 + vshuf4i.d $vr7, $vr6, 12 + vldx $vr13, $s4, $t4 + vfadd.d $vr7, $vr11, $vr7 + vpackev.d $vr6, $vr6, $vr10 + vfsub.d $vr6, $vr7, $vr6 + vpackev.d $vr7, $vr13, $vr10 + vshuf4i.d $vr10, $vr8, 9 + vfsub.d $vr6, $vr6, $vr10 + vfmul.d $vr6, $vr6, $vr2 + vldx $vr8, $t0, $t4 + fldx.d $ft6, $a6, $t4 + fldx.d $ft7, $t0, $t4 + fldx.d $ft8, $s3, $t4 + fldx.d $ft9, $a5, $t4 + vldx $vr18, $s3, $t4 + fadd.d $ft6, $ft6, $ft7 + fsub.d $ft6, $ft6, $ft8 + fsub.d $ft6, $ft6, $ft9 + fmul.d $ft6, $ft6, $fa2 + fldx.d $ft7, $t1, $t4 + fldx.d $ft8, $s4, $t4 + vshuf4i.d $vr10, $vr13, 12 + vshuf4i.d $vr11, $vr9, 12 + vfadd.d $vr11, $vr10, $vr11 + vfsub.d $vr12, $vr11, $vr12 + vfsub.d $vr12, $vr12, $vr7 + vpackod.d $vr13, $vr9, $vr18 + vori.b $vr17, $vr8, 0 + vshuf4i.d $vr17, $vr9, 9 + vfadd.d $vr9, $vr13, $vr17 vshuf4i.d $vr8, $vr7, 12 - vldx $vr14, $s4, $t4 - vfadd.d $vr8, $vr12, $vr8 - vpackev.d $vr7, $vr7, $vr11 - vfsub.d $vr7, $vr8, $vr7 - vpackev.d $vr8, $vr14, $vr11 - vshuf4i.d $vr11, $vr9, 9 - vfsub.d $vr7, $vr7, $vr11 - vfmul.d $vr7, $vr7, $vr3 - vldx $vr9, $a7, $t4 - fldx.d $ft7, $a6, $t4 - fldx.d $ft8, $a7, $t4 - fldx.d $ft9, $s3, $t4 - fldx.d $ft10, $a5, $t4 - vldx $vr19, $s3, $t4 - fadd.d $ft7, $ft7, $ft8 - fsub.d $ft7, $ft7, $ft9 - fsub.d $ft7, $ft7, $ft10 - fmul.d $ft7, $ft7, $fa2 - fldx.d $ft8, $t0, $t4 - fldx.d $ft9, $s4, $t4 - vshuf4i.d $vr11, $vr14, 12 - vshuf4i.d $vr12, $vr10, 12 - vfadd.d $vr12, $vr11, $vr12 - vfsub.d $vr13, $vr12, $vr13 - vfsub.d $vr13, $vr13, $vr8 - vpackod.d $vr14, $vr10, $vr19 - vori.b $vr18, $vr9, 0 - vshuf4i.d $vr18, $vr10, 9 - vfadd.d $vr10, $vr14, $vr18 - vshuf4i.d $vr9, $vr8, 12 - vfsub.d $vr8, $vr10, $vr9 - vshuf4i.d $vr19, $vr11, 12 - vfsub.d $vr8, $vr8, $vr19 - vfmul.d $vr8, $vr8, $vr3 - vfmul.d $vr9, $vr13, $vr0 - vreplvei.d $vr10, $vr9, 0 - fmul.d $ft2, $fa5, $ft2 - vreplvei.d $vr11, $vr7, 0 - vreplvei.d $vr13, $vr7, 1 - fmadd.d $ft2, $ft5, $ft3, $ft2 - fadd.d $ft2, $ft2, $fa1 - frecip.d $ft2, $ft2 - fneg.d $fa5, $fa5 - vextrins.d $vr15, $vr5, 16 - vfmul.d $vr5, $vr9, $vr15 - vfmadd.d $vr5, $vr8, $vr7, $vr5 - vreplvei.d $vr7, $vr10, 0 - vfmul.d $vr5, $vr7, $vr5 - vreplvei.d $vr7, $vr12, 1 - fadd.d $fa7, $fa7, $ft8 - fadd.d $fa7, $fa7, $ft9 - vreplvei.d $vr8, $vr12, 0 - fadd.d $fa4, $ft0, $fa4 - fadd.d $fa4, $fa4, $fa6 - fdiv.d $fa4, $fa7, $fa4 - vreplvei.d $vr6, $vr5, 0 - vreplvei.d $vr5, $vr5, 1 - fadd.d $fa5, $fa6, $fa5 - fadd.d $fa4, $fa4, $fa5 - fstx.d $fa4, $s0, $t4 + vfsub.d $vr7, $vr9, $vr8 + vshuf4i.d $vr18, $vr10, 12 + vfsub.d $vr7, $vr7, $vr18 + vfmul.d $vr7, $vr7, $vr2 + vfmul.d $vr8, $vr12, $vr0 + vreplvei.d $vr9, $vr8, 0 + fmul.d $ft1, $fa4, $ft1 + vreplvei.d $vr10, $vr6, 0 + vreplvei.d $vr12, $vr6, 1 + fmadd.d $ft1, $ft4, $ft2, $ft1 + fadd.d $ft1, $ft1, $fa1 + frecip.d $ft1, $ft1 + fneg.d $fa4, $fa4 + vextrins.d $vr14, $vr4, 16 + vfmul.d $vr4, $vr8, $vr14 + vfmadd.d $vr4, $vr7, $vr6, $vr4 + vreplvei.d $vr6, $vr9, 0 + vfmul.d $vr4, $vr6, $vr4 + vreplvei.d $vr6, $vr11, 1 + fadd.d $fa6, $fa6, $ft7 + fadd.d $fa6, $fa6, $ft8 + vreplvei.d $vr7, $vr11, 0 + fadd.d $fa3, $fa7, $fa3 + fadd.d $fa3, $fa3, $fa5 + fdiv.d $fa3, $fa6, $fa3 + vreplvei.d $vr5, $vr4, 0 + vreplvei.d $vr4, $vr4, 1 + fadd.d $fa4, $fa5, $fa4 + fadd.d $fa3, $fa3, $fa4 + fstx.d $fa3, $s0, $t4 addi.d $t2, $t2, -1 addi.d $t3, $t3, 4 bnez $t2, .LBB3_6 @@ -2944,11 +2935,8 @@ _ZL13BM_FIR_LAMBDARN9benchmark5StateE: # @_ZL13BM_FIR_LAMBDARN9benchmark5StateE andi $a3, $a3, 1 vldi $vr0, -1016 movgr2fr.d $fa1, $zero - ori $a4, $zero, 0 - lu32i.d $a4, -524288 - lu52i.d $a4, $a4, 1024 - xvreplgr2vr.d $xr2, $a4 - xvrepli.b $xr3, 0 + xvrepli.b $xr2, 0 + xvldi $xr3, -1016 b .LBB5_5 .p2align 4, , 16 .LBB5_4: # %"._Z6forallIZL13BM_FIR_LAMBDARN9benchmark5StateEE3$_0Ev9simd_execiiT_.exit_crit_edge.us" @@ -2976,7 +2964,7 @@ _ZL13BM_FIR_LAMBDARN9benchmark5StateE: # @_ZL13BM_FIR_LAMBDARN9benchmark5StateE xvld $xr4, $a6, -64 xvld $xr5, $a6, -56 xvld $xr6, $a6, -48 - xvfmadd.d $xr4, $xr4, $xr2, $xr3 + xvfmadd.d $xr4, $xr4, $xr3, $xr2 xvld $xr7, $a6, -40 xvfsub.d $xr4, $xr4, $xr5 xvfsub.d $xr4, $xr4, $xr6 @@ -2986,7 +2974,7 @@ _ZL13BM_FIR_LAMBDARN9benchmark5StateE: # @_ZL13BM_FIR_LAMBDARN9benchmark5StateE xvld $xr7, $a6, -16 xvfsub.d $xr4, $xr4, $xr5 xvld $xr5, $a6, -8 - xvfmadd.d $xr4, $xr6, $xr2, $xr4 + xvfmadd.d $xr4, $xr6, $xr3, $xr4 xvfsub.d $xr4, $xr4, $xr7 xvld $xr6, $a6, 0 xvfsub.d $xr4, $xr4, $xr5 @@ -2995,7 +2983,7 @@ _ZL13BM_FIR_LAMBDARN9benchmark5StateE: # @_ZL13BM_FIR_LAMBDARN9benchmark5StateE xvfsub.d $xr4, $xr4, $xr6 xvld $xr6, $a6, 24 xvfsub.d $xr4, $xr4, $xr5 - xvfmadd.d $xr4, $xr7, $xr2, $xr4 + xvfmadd.d $xr4, $xr7, $xr3, $xr4 xvld $xr5, $a6, 32 xvfsub.d $xr4, $xr4, $xr6 xvld $xr6, $a6, 40 @@ -3005,7 +2993,7 @@ _ZL13BM_FIR_LAMBDARN9benchmark5StateE: # @_ZL13BM_FIR_LAMBDARN9benchmark5StateE xvfsub.d $xr4, $xr4, $xr6 xvfsub.d $xr4, $xr4, $xr7 addi.d $a6, $a6, 32 - xvfmadd.d $xr4, $xr5, $xr2, $xr4 + xvfmadd.d $xr4, $xr5, $xr3, $xr4 xvst $xr4, $a4, 0 addi.d $a5, $a5, -4 addi.d $a4, $a4, 32 diff --git a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/RawSubsetAbenchmarks.s b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/RawSubsetAbenchmarks.s index 62e07b70..8a2b5bab 100644 --- a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/RawSubsetAbenchmarks.s +++ b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/RawSubsetAbenchmarks.s @@ -101,8 +101,7 @@ _ZL20BM_PRESSURE_CALC_RAWRN9benchmark5StateE: # @_ZL20BM_PRESSURE_CALC_RAWRN9ben andi $a6, $a6, 1 movgr2fr.d $fa5, $zero xvrepli.b $xr6, 0 - lu52i.d $a7, $zero, 1023 - xvreplgr2vr.d $xr7, $a7 + xvldi $xr7, -912 b .LBB0_5 .p2align 4, , 16 .LBB0_4: # %._crit_edge.us.us @@ -525,14 +524,9 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma vldi $vr16, -864 vldi $vr17, -996 vldi $vr18, -872 - lu52i.d $a0, $zero, 1022 - xvreplgr2vr.d $xr19, $a0 - lu52i.d $a0, $zero, -1023 - xvreplgr2vr.d $xr20, $a0 - ori $a0, $zero, 0 - lu32i.d $a0, -524288 - lu52i.d $a0, $a0, 1024 - xvreplgr2vr.d $xr21, $a0 + xvldi $xr19, -928 + xvldi $xr20, -880 + xvldi $xr21, -1016 vldi $vr22, -912 st.d $s1, $sp, 504 # 8-byte Folded Spill b .LBB1_4 @@ -571,7 +565,6 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma ld.d $a4, $sp, 384 # 8-byte Folded Reload ld.d $a5, $sp, 392 # 8-byte Folded Reload ld.d $a6, $sp, 424 # 8-byte Folded Reload - lu52i.d $fp, $zero, -1026 .p2align 4, , 16 .LBB1_8: # %vector.body325 # Parent Loop BB1_4 Depth=1 @@ -586,7 +579,7 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma xvld $xr7, $a3, 0 xvfadd.d $xr2, $xr2, $xr4 xvfadd.d $xr3, $xr3, $xr5 - xvreplgr2vr.d $xr4, $fp + xvldi $xr4, -800 xvfmul.d $xr5, $xr6, $xr4 xvfmul.d $xr4, $xr7, $xr4 xvld $xr6, $a2, -32 @@ -918,17 +911,17 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma # in Loop: Header=BB1_4 Depth=1 move $s7, $a3 move $s3, $t7 - move $s6, $ra - move $s0, $t1 - ld.d $s1, $sp, 512 # 8-byte Folded Reload - move $fp, $t0 + move $s0, $ra + move $s6, $t1 + ld.d $s2, $sp, 512 # 8-byte Folded Reload + move $s1, $t0 ld.d $a2, $sp, 496 # 8-byte Folded Reload ld.d $a4, $sp, 472 # 8-byte Folded Reload move $a6, $a7 - ld.d $s8, $sp, 488 # 8-byte Folded Reload - ld.d $s2, $sp, 480 # 8-byte Folded Reload + ld.d $s4, $sp, 488 # 8-byte Folded Reload + ld.d $s8, $sp, 480 # 8-byte Folded Reload move $s5, $t5 - move $s4, $t8 + move $fp, $t8 ld.d $a0, $sp, 456 # 8-byte Folded Reload ld.d $a1, $sp, 448 # 8-byte Folded Reload b .LBB1_37 @@ -936,13 +929,13 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma .LBB1_35: # %.lr.ph235._crit_edge # in Loop: Header=BB1_37 Depth=2 fld.d $fs4, $s5, 0 - fld.d $fs5, $s4, 0 + fld.d $fs5, $fp, 0 fmov.d $fa0, $fs2 .LBB1_36: # in Loop: Header=BB1_37 Depth=2 - fld.d $fa1, $s6, 0 - fld.d $fa2, $s0, 0 - fld.d $fa3, $s1, 0 - fld.d $fa4, $fp, 0 + fld.d $fa1, $s0, 0 + fld.d $fa2, $s6, 0 + fld.d $fa3, $s2, 0 + fld.d $fa4, $s1, 0 fadd.d $fa1, $fa1, $fa2 fadd.d $fa2, $fa3, $fa4 fmul.d $fa2, $fa2, $ft8 @@ -958,17 +951,17 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma fcmp.clt.d $fcc0, $fa0, $ft1 fsel $fa0, $fa0, $ft1, $fcc0 fst.d $fa0, $s5, 0 - addi.d $s4, $s4, 8 + addi.d $fp, $fp, 8 addi.d $s5, $s5, 8 - addi.d $s2, $s2, 8 addi.d $s8, $s8, 8 + addi.d $s4, $s4, 8 addi.d $a6, $a6, 8 addi.d $a4, $a4, 8 addi.d $a2, $a2, 8 - addi.d $fp, $fp, 8 addi.d $s1, $s1, 8 - addi.d $s0, $s0, 8 + addi.d $s2, $s2, 8 addi.d $s6, $s6, 8 + addi.d $s0, $s0, 8 addi.d $s7, $s7, -1 addi.d $s3, $s3, 8 beqz $s7, .LBB1_43 @@ -983,7 +976,7 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma fld.d $fa1, $a4, 0 fld.d $fa2, $a6, 0 fld.d $fs4, $s5, 0 - fld.d $fs5, $s4, 0 + fld.d $fs5, $fp, 0 fmul.d $fa1, $fa1, $fa1 fmul.d $fa1, $fa1, $fa2 fld.d $fa2, $a1, %pc_lo12(.LCPI1_1) @@ -1050,8 +1043,8 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma .LBB1_41: # in Loop: Header=BB1_37 Depth=2 fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) .LBB1_42: # in Loop: Header=BB1_37 Depth=2 - fld.d $fa1, $s8, 0 - fld.d $fa2, $s2, 0 + fld.d $fa1, $s4, 0 + fld.d $fa2, $s8, 0 fmadd.d $fa0, $fa0, $fa1, $fa2 b .LBB1_36 .p2align 4, , 16 @@ -1852,105 +1845,103 @@ _ZL21BM_DEL_DOT_VEC_2D_RAWRN9benchmark5StateE: # @_ZL21BM_DEL_DOT_VEC_2D_RAWRN9b alsl.d $a4, $s6, $s2, 3 addi.d $a5, $s3, 8 alsl.d $a6, $s6, $a5, 3 - alsl.d $a7, $s6, $s3, 3 - alsl.d $t0, $s6, $s4, 3 - ld.d $t1, $sp, 80 - pcalau12i $t2, %pc_hi20(.LCPI3_0) - vld $vr0, $t2, %pc_lo12(.LCPI3_0) - pcalau12i $t2, %pc_hi20(.LCPI3_1) - fld.d $fa1, $t2, %pc_lo12(.LCPI3_1) + ld.d $a7, $sp, 80 + pcalau12i $t0, %pc_hi20(.LCPI3_0) + vld $vr0, $t0, %pc_lo12(.LCPI3_0) + pcalau12i $t0, %pc_hi20(.LCPI3_1) + fld.d $fa1, $t0, %pc_lo12(.LCPI3_1) + alsl.d $t0, $s6, $s3, 3 + alsl.d $t1, $s6, $s4, 3 vldi $vr2, -928 - lu52i.d $t2, $zero, 1022 - vreplgr2vr.d $vr3, $t2 .p2align 4, , 16 .LBB3_5: # %.preheader.us # =>This Loop Header: Depth=1 # Child Loop BB3_6 Depth 2 move $t2, $a0 - move $t3, $t1 + move $t3, $a7 .p2align 4, , 16 .LBB3_6: # Parent Loop BB3_5 Depth=1 # => This Inner Loop Header: Depth=2 ld.w $t4, $t3, 0 slli.d $t4, $t4, 3 - vldx $vr7, $a3, $t4 - fldx.d $fa4, $a2, $t4 - fldx.d $fa5, $a3, $t4 - fldx.d $fa6, $s1, $t4 - fldx.d $ft0, $a1, $t4 - vldx $vr9, $s1, $t4 - fadd.d $fa4, $fa4, $fa5 - fsub.d $fa4, $fa4, $fa6 - fsub.d $fa4, $fa4, $ft0 - fmul.d $fa5, $fa4, $fa2 - vldx $vr8, $a4, $t4 - fldx.d $fa4, $a4, $t4 - vldx $vr10, $t0, $t4 - vldx $vr11, $s2, $t4 - fldx.d $fa6, $s2, $t4 - vpackod.d $vr12, $vr9, $vr8 - vpackev.d $vr13, $vr10, $vr8 + vldx $vr6, $a3, $t4 + fldx.d $fa3, $a2, $t4 + fldx.d $fa4, $a3, $t4 + fldx.d $fa5, $s1, $t4 + fldx.d $fa7, $a1, $t4 + vldx $vr8, $s1, $t4 + fadd.d $fa3, $fa3, $fa4 + fsub.d $fa3, $fa3, $fa5 + fsub.d $fa3, $fa3, $fa7 + fmul.d $fa4, $fa3, $fa2 + vldx $vr7, $a4, $t4 + fldx.d $fa3, $a4, $t4 + vldx $vr9, $t1, $t4 + vldx $vr10, $s2, $t4 + fldx.d $fa5, $s2, $t4 + vpackod.d $vr11, $vr8, $vr7 + vpackev.d $vr12, $vr9, $vr7 + vshuf4i.d $vr7, $vr6, 12 + vldx $vr13, $s4, $t4 + vfadd.d $vr7, $vr11, $vr7 + vpackev.d $vr6, $vr6, $vr10 + vfsub.d $vr6, $vr7, $vr6 + vpackev.d $vr7, $vr13, $vr10 + vshuf4i.d $vr10, $vr8, 9 + vfsub.d $vr6, $vr6, $vr10 + vfmul.d $vr6, $vr6, $vr2 + vldx $vr8, $t0, $t4 + fldx.d $ft6, $a6, $t4 + fldx.d $ft7, $t0, $t4 + fldx.d $ft8, $s3, $t4 + fldx.d $ft9, $a5, $t4 + vldx $vr18, $s3, $t4 + fadd.d $ft6, $ft6, $ft7 + fsub.d $ft6, $ft6, $ft8 + fsub.d $ft6, $ft6, $ft9 + fmul.d $ft6, $ft6, $fa2 + fldx.d $ft7, $t1, $t4 + fldx.d $ft8, $s4, $t4 + vshuf4i.d $vr10, $vr13, 12 + vshuf4i.d $vr11, $vr9, 12 + vfadd.d $vr11, $vr10, $vr11 + vfsub.d $vr12, $vr11, $vr12 + vfsub.d $vr12, $vr12, $vr7 + vpackod.d $vr13, $vr9, $vr18 + vori.b $vr17, $vr8, 0 + vshuf4i.d $vr17, $vr9, 9 + vfadd.d $vr9, $vr13, $vr17 vshuf4i.d $vr8, $vr7, 12 - vldx $vr14, $s4, $t4 - vfadd.d $vr8, $vr12, $vr8 - vpackev.d $vr7, $vr7, $vr11 - vfsub.d $vr7, $vr8, $vr7 - vpackev.d $vr8, $vr14, $vr11 - vshuf4i.d $vr11, $vr9, 9 - vfsub.d $vr7, $vr7, $vr11 - vfmul.d $vr7, $vr7, $vr3 - vldx $vr9, $a7, $t4 - fldx.d $ft7, $a6, $t4 - fldx.d $ft8, $a7, $t4 - fldx.d $ft9, $s3, $t4 - fldx.d $ft10, $a5, $t4 - vldx $vr19, $s3, $t4 - fadd.d $ft7, $ft7, $ft8 - fsub.d $ft7, $ft7, $ft9 - fsub.d $ft7, $ft7, $ft10 - fmul.d $ft7, $ft7, $fa2 - fldx.d $ft8, $t0, $t4 - fldx.d $ft9, $s4, $t4 - vshuf4i.d $vr11, $vr14, 12 - vshuf4i.d $vr12, $vr10, 12 - vfadd.d $vr12, $vr11, $vr12 - vfsub.d $vr13, $vr12, $vr13 - vfsub.d $vr13, $vr13, $vr8 - vpackod.d $vr14, $vr10, $vr19 - vori.b $vr18, $vr9, 0 - vshuf4i.d $vr18, $vr10, 9 - vfadd.d $vr10, $vr14, $vr18 - vshuf4i.d $vr9, $vr8, 12 - vfsub.d $vr8, $vr10, $vr9 - vshuf4i.d $vr19, $vr11, 12 - vfsub.d $vr8, $vr8, $vr19 - vfmul.d $vr8, $vr8, $vr3 - vfmul.d $vr9, $vr13, $vr0 - vreplvei.d $vr10, $vr9, 0 - fmul.d $ft2, $fa5, $ft2 - vreplvei.d $vr11, $vr7, 0 - vreplvei.d $vr13, $vr7, 1 - fmadd.d $ft2, $ft5, $ft3, $ft2 - fadd.d $ft2, $ft2, $fa1 - frecip.d $ft2, $ft2 - fneg.d $fa5, $fa5 - vextrins.d $vr15, $vr5, 16 - vfmul.d $vr5, $vr9, $vr15 - vfmadd.d $vr5, $vr8, $vr7, $vr5 - vreplvei.d $vr7, $vr10, 0 - vfmul.d $vr5, $vr7, $vr5 - vreplvei.d $vr7, $vr12, 1 - fadd.d $fa7, $fa7, $ft8 - fadd.d $fa7, $fa7, $ft9 - vreplvei.d $vr8, $vr12, 0 - fadd.d $fa4, $ft0, $fa4 - fadd.d $fa4, $fa4, $fa6 - fdiv.d $fa4, $fa7, $fa4 - vreplvei.d $vr6, $vr5, 0 - vreplvei.d $vr5, $vr5, 1 - fadd.d $fa5, $fa6, $fa5 - fadd.d $fa4, $fa4, $fa5 - fstx.d $fa4, $s0, $t4 + vfsub.d $vr7, $vr9, $vr8 + vshuf4i.d $vr18, $vr10, 12 + vfsub.d $vr7, $vr7, $vr18 + vfmul.d $vr7, $vr7, $vr2 + vfmul.d $vr8, $vr12, $vr0 + vreplvei.d $vr9, $vr8, 0 + fmul.d $ft1, $fa4, $ft1 + vreplvei.d $vr10, $vr6, 0 + vreplvei.d $vr12, $vr6, 1 + fmadd.d $ft1, $ft4, $ft2, $ft1 + fadd.d $ft1, $ft1, $fa1 + frecip.d $ft1, $ft1 + fneg.d $fa4, $fa4 + vextrins.d $vr14, $vr4, 16 + vfmul.d $vr4, $vr8, $vr14 + vfmadd.d $vr4, $vr7, $vr6, $vr4 + vreplvei.d $vr6, $vr9, 0 + vfmul.d $vr4, $vr6, $vr4 + vreplvei.d $vr6, $vr11, 1 + fadd.d $fa6, $fa6, $ft7 + fadd.d $fa6, $fa6, $ft8 + vreplvei.d $vr7, $vr11, 0 + fadd.d $fa3, $fa7, $fa3 + fadd.d $fa3, $fa3, $fa5 + fdiv.d $fa3, $fa6, $fa3 + vreplvei.d $vr5, $vr4, 0 + vreplvei.d $vr4, $vr4, 1 + fadd.d $fa4, $fa5, $fa4 + fadd.d $fa3, $fa3, $fa4 + fstx.d $fa3, $s0, $t4 addi.d $t2, $t2, -1 addi.d $t3, $t3, 4 bnez $t2, .LBB3_6 @@ -2928,11 +2919,8 @@ _ZL10BM_FIR_RAWRN9benchmark5StateE: # @_ZL10BM_FIR_RAWRN9benchmark5StateE andi $a3, $a3, 1 vldi $vr0, -1016 movgr2fr.d $fa1, $zero - ori $a4, $zero, 0 - lu32i.d $a4, -524288 - lu52i.d $a4, $a4, 1024 - xvreplgr2vr.d $xr2, $a4 - xvrepli.b $xr3, 0 + xvrepli.b $xr2, 0 + xvldi $xr3, -1016 b .LBB5_5 .p2align 4, , 16 .LBB5_4: # %._crit_edge.us @@ -2960,7 +2948,7 @@ _ZL10BM_FIR_RAWRN9benchmark5StateE: # @_ZL10BM_FIR_RAWRN9benchmark5StateE xvld $xr4, $a6, -64 xvld $xr5, $a6, -56 xvld $xr6, $a6, -48 - xvfmadd.d $xr4, $xr4, $xr2, $xr3 + xvfmadd.d $xr4, $xr4, $xr3, $xr2 xvld $xr7, $a6, -40 xvfsub.d $xr4, $xr4, $xr5 xvfsub.d $xr4, $xr4, $xr6 @@ -2970,7 +2958,7 @@ _ZL10BM_FIR_RAWRN9benchmark5StateE: # @_ZL10BM_FIR_RAWRN9benchmark5StateE xvld $xr7, $a6, -16 xvfsub.d $xr4, $xr4, $xr5 xvld $xr5, $a6, -8 - xvfmadd.d $xr4, $xr6, $xr2, $xr4 + xvfmadd.d $xr4, $xr6, $xr3, $xr4 xvfsub.d $xr4, $xr4, $xr7 xvld $xr6, $a6, 0 xvfsub.d $xr4, $xr4, $xr5 @@ -2979,7 +2967,7 @@ _ZL10BM_FIR_RAWRN9benchmark5StateE: # @_ZL10BM_FIR_RAWRN9benchmark5StateE xvfsub.d $xr4, $xr4, $xr6 xvld $xr6, $a6, 24 xvfsub.d $xr4, $xr4, $xr5 - xvfmadd.d $xr4, $xr7, $xr2, $xr4 + xvfmadd.d $xr4, $xr7, $xr3, $xr4 xvld $xr5, $a6, 32 xvfsub.d $xr4, $xr4, $xr6 xvld $xr6, $a6, 40 @@ -2989,7 +2977,7 @@ _ZL10BM_FIR_RAWRN9benchmark5StateE: # @_ZL10BM_FIR_RAWRN9benchmark5StateE xvfsub.d $xr4, $xr4, $xr6 xvfsub.d $xr4, $xr4, $xr7 addi.d $a6, $a6, 32 - xvfmadd.d $xr4, $xr5, $xr2, $xr4 + xvfmadd.d $xr4, $xr5, $xr3, $xr4 xvst $xr4, $a4, 0 addi.d $a5, $a5, -4 addi.d $a4, $a4, 32 diff --git a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/LambdaSubsetCbenchmarks.s b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/LambdaSubsetCbenchmarks.s index 4c437241..b3386a69 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/LambdaSubsetCbenchmarks.s +++ b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/LambdaSubsetCbenchmarks.s @@ -1854,8 +1854,7 @@ _ZL16BM_PIC_1D_LAMBDARN9benchmark5StateE: # @_ZL16BM_PIC_1D_LAMBDARN9benchmark5S addi.w $a3, $a3, 0 movgr2fr.d $fa1, $zero vldi $vr2, -912 - ori $a4, $zero, 2047 - vreplgr2vr.w $vr3, $a4 + vldi $vr3, -2553 b .LBB12_4 .p2align 4, , 16 .LBB12_3: # %"_Z6forallIZL16BM_PIC_1D_LAMBDARN9benchmark5StateEE3$_2Ev9simd_execiiT_.exit" diff --git a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/RawSubsetCbenchmarks.s b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/RawSubsetCbenchmarks.s index 090c556e..52968fa1 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/RawSubsetCbenchmarks.s +++ b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/RawSubsetCbenchmarks.s @@ -1837,8 +1837,7 @@ _ZL13BM_PIC_1D_RAWRN9benchmark5StateE: # @_ZL13BM_PIC_1D_RAWRN9benchmark5StateE xvreplve0.d $xr0, $xr9 movgr2fr.d $fa1, $zero vldi $vr2, -912 - ori $a3, $zero, 2047 - vreplgr2vr.w $vr3, $a3 + vldi $vr3, -2553 b .LBB12_4 .p2align 4, , 16 .LBB12_3: # %._crit_edge diff --git a/results/MicroBenchmarks/harris/CMakeFiles/harris.dir/harrisKernel.s b/results/MicroBenchmarks/harris/CMakeFiles/harris.dir/harrisKernel.s index 8934e7ba..67e6f1ab 100644 --- a/results/MicroBenchmarks/harris/CMakeFiles/harris.dir/harrisKernel.s +++ b/results/MicroBenchmarks/harris/CMakeFiles/harris.dir/harrisKernel.s @@ -17,9 +17,6 @@ .type _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_,@function _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_ # %bb.0: - addi.w $t2, $zero, -1 - blt $a0, $t2, .LBB0_141 -# %bb.1: # %.preheader317.lr.ph addi.d $sp, $sp, -144 st.d $ra, $sp, 136 # 8-byte Folded Spill st.d $fp, $sp, 128 # 8-byte Folded Spill @@ -32,46 +29,50 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha st.d $s6, $sp, 72 # 8-byte Folded Spill st.d $s7, $sp, 64 # 8-byte Folded Spill st.d $s8, $sp, 56 # 8-byte Folded Spill + addi.w $t2, $zero, -1 + st.d $a0, $sp, 48 # 8-byte Folded Spill + blt $a0, $t2, .LBB0_140 +# %bb.1: # %.preheader317.lr.ph move $s8, $a7 ld.d $t0, $sp, 144 - st.d $a3, $sp, 24 # 8-byte Folded Spill blt $a1, $t2, .LBB0_65 # %bb.2: # %.preheader317.us.preheader move $t1, $a5 + move $t8, $a2 move $s2, $zero - st.d $a0, $sp, 8 # 8-byte Folded Spill + ld.d $a0, $sp, 48 # 8-byte Folded Reload addi.w $a5, $a0, 2 - st.d $a1, $sp, 16 # 8-byte Folded Spill addi.w $t3, $a1, 2 ori $a0, $zero, 1 - slt $a1, $a0, $a5 - masknez $a0, $a0, $a1 - st.d $a5, $sp, 40 # 8-byte Folded Spill - maskeqz $a1, $a5, $a1 - or $s3, $a1, $a0 + slt $a2, $a0, $a5 + masknez $a0, $a0, $a2 + st.d $a5, $sp, 32 # 8-byte Folded Spill + maskeqz $a2, $a5, $a2 + or $s3, $a2, $a0 bstrpick.d $t2, $t3, 31, 0 slli.d $a0, $s3, 13 - alsl.d $a1, $s3, $a0, 3 - st.d $a1, $sp, 32 # 8-byte Folded Spill - add.d $a1, $t1, $a1 - alsl.d $a1, $t2, $a1, 2 + alsl.d $a2, $s3, $a0, 3 + st.d $a2, $sp, 24 # 8-byte Folded Spill + add.d $a2, $t1, $a2 + alsl.d $a2, $t2, $a2, 2 lu12i.w $a7, -3 ori $a5, $a7, 4088 - move $a3, $a5 - add.d $a1, $a1, $a5 + st.d $a5, $sp, 16 # 8-byte Folded Spill + add.d $a2, $a2, $a5 alsl.d $a0, $s3, $a0, 4 - add.d $a0, $a2, $a0 + add.d $a0, $t8, $a0 alsl.d $a0, $t2, $a0, 2 lu12i.w $a5, 2 ori $s4, $a5, 24 - add.d $t8, $a0, $s4 - sltu $a0, $t1, $t8 - sltu $a1, $a2, $a1 - and $a0, $a0, $a1 - bstrpick.d $a1, $t3, 31, 3 - slli.d $t6, $a1, 3 - add.d $s6, $a2, $s4 - st.d $t3, $sp, 48 # 8-byte Folded Spill + add.d $a0, $a0, $s4 + st.d $a0, $sp, 8 # 8-byte Folded Spill + sltu $a0, $t1, $a0 + sltu $a2, $t8, $a2 + and $a0, $a0, $a2 + bstrpick.d $a2, $t3, 31, 3 + slli.d $t7, $a2, 3 + add.d $s6, $t8, $s4 + st.d $t3, $sp, 40 # 8-byte Folded Spill sltui $fp, $t3, 8 or $a0, $fp, $a0 andi $s7, $a0, 1 @@ -84,14 +85,14 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha pcalau12i $a0, %pc_hi20(.LCPI0_1) fld.s $fa1, $a0, %pc_lo12(.LCPI0_1) ori $t3, $a5, 8 - ori $a0, $a7, 4072 - lu12i.w $a1, 252586 - ori $a1, $a1, 2731 - xvreplgr2vr.w $xr2, $a1 - lu12i.w $a1, 254634 - ori $a1, $a1, 2731 - xvreplgr2vr.w $xr3, $a1 - move $ra, $a2 + ori $ra, $a7, 4072 + lu12i.w $a0, 252586 + ori $a0, $a0, 2731 + xvreplgr2vr.w $xr2, $a0 + lu12i.w $a0, 254634 + ori $a0, $a0, 2731 + xvreplgr2vr.w $xr3, $a0 + move $a2, $t8 move $t4, $t1 b .LBB0_4 .p2align 4, , 16 @@ -100,7 +101,7 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha addi.d $s2, $s2, 1 add.d $t4, $t4, $t3 add.d $s6, $s6, $t5 - add.d $ra, $ra, $t5 + add.d $a2, $a2, $t5 beq $s2, $s3, .LBB0_11 .LBB0_4: # %.preheader317.us # =>This Loop Header: Depth=1 @@ -113,20 +114,20 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha .p2align 4, , 16 .LBB0_6: # %vector.body.preheader # in Loop: Header=BB0_4 Depth=1 - move $a1, $s6 - move $s5, $t6 + move $a0, $s6 + move $s5, $t7 move $a5, $t4 .p2align 4, , 16 .LBB0_7: # %vector.body # Parent Loop BB0_4 Depth=1 # => This Inner Loop Header: Depth=2 - xvldx $xr4, $a1, $a0 - ori $t7, $a7, 4080 - xvldx $xr5, $a1, $t7 - xvldx $xr6, $a1, $t3 - xvld $xr7, $a1, -8 - xvldx $xr8, $a1, $t5 - xvld $xr9, $a1, 0 + xvldx $xr4, $a0, $ra + ori $t6, $a7, 4080 + xvldx $xr5, $a0, $t6 + xvldx $xr6, $a0, $t3 + xvld $xr7, $a0, -8 + xvldx $xr8, $a0, $t5 + xvld $xr9, $a0, 0 xvfadd.s $xr4, $xr4, $xr6 xvfsub.s $xr4, $xr5, $xr4 xvfadd.s $xr4, $xr4, $xr8 @@ -136,27 +137,27 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha xvst $xr4, $a5, 0 addi.d $a5, $a5, 32 addi.d $s5, $s5, -8 - addi.d $a1, $a1, 32 + addi.d $a0, $a0, 32 bnez $s5, .LBB0_7 # %bb.8: # %middle.block # in Loop: Header=BB0_4 Depth=1 - move $a5, $t6 - beq $t6, $t2, .LBB0_3 + move $a5, $t7 + beq $t7, $t2, .LBB0_3 .LBB0_9: # %scalar.ph.preheader # in Loop: Header=BB0_4 Depth=1 - sub.d $a1, $t2, $a5 + sub.d $a0, $t2, $a5 slli.d $a5, $a5, 2 .p2align 4, , 16 .LBB0_10: # %scalar.ph # Parent Loop BB0_4 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t7, $ra, $a5 - fldx.s $fa4, $ra, $a5 - fld.s $fa5, $t7, 8 - fldx.s $fa6, $t7, $s0 - fldx.s $fa7, $t7, $t5 - fldx.s $ft0, $t7, $s1 - fldx.s $ft1, $t7, $s4 + add.d $t6, $a2, $a5 + fldx.s $fa4, $a2, $a5 + fld.s $fa5, $t6, 8 + fldx.s $fa6, $t6, $s0 + fldx.s $fa7, $t6, $t5 + fldx.s $ft0, $t6, $s1 + fldx.s $ft1, $t6, $s4 fadd.s $fa4, $fa4, $fa6 fsub.s $fa4, $fa5, $fa4 fadd.s $fa4, $fa4, $ft0 @@ -164,100 +165,102 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha fmul.s $fa5, $fa5, $fa0 fmadd.s $fa4, $fa4, $fa1, $fa5 fstx.s $fa4, $t4, $a5 - addi.d $a1, $a1, -1 + addi.d $a0, $a0, -1 addi.d $a5, $a5, 4 - bnez $a1, .LBB0_10 + bnez $a0, .LBB0_10 b .LBB0_3 .LBB0_11: # %.preheader315.us.preheader move $a7, $zero ori $a0, $zero, 1 - ld.d $a5, $sp, 40 # 8-byte Folded Reload - slt $a1, $a0, $a5 - masknez $a0, $a0, $a1 - maskeqz $a1, $a5, $a1 - or $s2, $a1, $a0 - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a5, $sp, 32 # 8-byte Folded Reload + slt $a2, $a0, $a5 + masknez $a0, $a0, $a2 + maskeqz $a2, $a5, $a2 + or $s2, $a2, $a0 + ld.d $a0, $sp, 24 # 8-byte Folded Reload add.d $a0, $a4, $a0 alsl.d $a0, $t2, $a0, 2 - add.d $a0, $a0, $a3 - sltu $a1, $a4, $t8 - sltu $a0, $a2, $a0 - and $a1, $a1, $a0 - bstrpick.d $a0, $t2, 31, 3 - slli.d $a0, $a0, 3 - add.d $t6, $a2, $s1 - add.d $a2, $a2, $s0 - or $a1, $fp, $a1 - andi $t7, $a1, 1 - lu12i.w $t8, -5 - move $fp, $a4 - ld.d $ra, $sp, 48 # 8-byte Folded Reload + ld.d $a2, $sp, 16 # 8-byte Folded Reload + add.d $a0, $a0, $a2 + ld.d $a2, $sp, 8 # 8-byte Folded Reload + sltu $a2, $a4, $a2 + sltu $a0, $t8, $a0 + and $a0, $a2, $a0 + bstrpick.d $a2, $t2, 31, 3 + slli.d $t6, $a2, 3 + add.d $t7, $t8, $s1 + add.d $a2, $t8, $s0 + or $a0, $fp, $a0 + andi $t8, $a0, 1 + lu12i.w $fp, -5 + move $s0, $a4 + ld.d $ra, $sp, 40 # 8-byte Folded Reload b .LBB0_13 .p2align 4, , 16 .LBB0_12: # %._crit_edge.us322 # in Loop: Header=BB0_13 Depth=1 addi.d $a7, $a7, 1 - add.d $fp, $fp, $t3 - add.d $t6, $t6, $t5 + add.d $s0, $s0, $t3 + add.d $t7, $t7, $t5 add.d $a2, $a2, $t5 beq $a7, $s2, .LBB0_20 .LBB0_13: # %.preheader315.us # =>This Loop Header: Depth=1 # Child Loop BB0_16 Depth 2 # Child Loop BB0_19 Depth 2 - ori $a5, $t8, 4064 - beqz $t7, .LBB0_15 + ori $a5, $fp, 4064 + beqz $t8, .LBB0_15 # %bb.14: # in Loop: Header=BB0_13 Depth=1 move $t4, $zero b .LBB0_18 .p2align 4, , 16 .LBB0_15: # %vector.body502.preheader # in Loop: Header=BB0_13 Depth=1 - move $a1, $a0 - move $s0, $t6 - move $s1, $fp + move $a0, $t6 + move $s1, $t7 + move $s3, $s0 .p2align 4, , 16 .LBB0_16: # %vector.body502 # Parent Loop BB0_13 Depth=1 # => This Inner Loop Header: Depth=2 - ori $t4, $t8, 4056 - xvldx $xr4, $s0, $t4 - xvld $xr5, $s0, -8 - ori $t4, $t8, 4060 - xvldx $xr6, $s0, $a5 - xvldx $xr7, $s0, $t4 - xvld $xr8, $s0, 0 - xvld $xr9, $s0, -4 + ori $t4, $fp, 4056 + xvldx $xr4, $s1, $t4 + xvld $xr5, $s1, -8 + ori $t4, $fp, 4060 + xvldx $xr6, $s1, $a5 + xvldx $xr7, $s1, $t4 + xvld $xr8, $s1, 0 + xvld $xr9, $s1, -4 xvfadd.s $xr4, $xr4, $xr6 xvfsub.s $xr4, $xr5, $xr4 xvfadd.s $xr4, $xr4, $xr8 xvfsub.s $xr5, $xr9, $xr7 xvfmul.s $xr5, $xr5, $xr3 xvfmadd.s $xr4, $xr4, $xr2, $xr5 - xvst $xr4, $s1, 0 + xvst $xr4, $s3, 0 + addi.d $s3, $s3, 32 + addi.d $a0, $a0, -8 addi.d $s1, $s1, 32 - addi.d $a1, $a1, -8 - addi.d $s0, $s0, 32 - bnez $a1, .LBB0_16 + bnez $a0, .LBB0_16 # %bb.17: # %middle.block511 # in Loop: Header=BB0_13 Depth=1 - move $t4, $a0 - beq $a0, $t2, .LBB0_12 + move $t4, $t6 + beq $t6, $t2, .LBB0_12 .LBB0_18: # %scalar.ph497.preheader # in Loop: Header=BB0_13 Depth=1 - sub.d $a1, $t2, $t4 - slli.d $s0, $t4, 2 + sub.d $a0, $t2, $t4 + slli.d $s1, $t4, 2 .p2align 4, , 16 .LBB0_19: # %scalar.ph497 # Parent Loop BB0_13 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t4, $a2, $s0 + add.d $t4, $a2, $s1 fldx.s $fa4, $t4, $a5 - fldx.s $fa5, $a2, $s0 - ori $s1, $t8, 4072 - fldx.s $fa6, $t4, $s1 - ori $s1, $t8, 4068 - fldx.s $fa7, $t4, $s1 + fldx.s $fa5, $a2, $s1 + ori $s3, $fp, 4072 + fldx.s $fa6, $t4, $s3 + ori $s3, $fp, 4068 + fldx.s $fa7, $t4, $s3 fld.s $ft0, $t4, 8 fld.s $ft1, $t4, 4 fadd.s $fa4, $fa4, $fa6 @@ -266,24 +269,24 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha fsub.s $fa5, $ft1, $fa7 fmul.s $fa5, $fa5, $fa0 fmadd.s $fa4, $fa4, $fa1, $fa5 - fstx.s $fa4, $fp, $s0 - addi.d $a1, $a1, -1 - addi.d $s0, $s0, 4 - bnez $a1, .LBB0_19 + fstx.s $fa4, $s0, $s1 + addi.d $a0, $a0, -1 + addi.d $s1, $s1, 4 + bnez $a0, .LBB0_19 b .LBB0_12 .LBB0_20: # %.preheader313.us.preheader move $fp, $zero ori $a0, $zero, 1 - ld.d $a2, $sp, 40 # 8-byte Folded Reload - slt $a1, $a0, $a2 - masknez $a0, $a0, $a1 - maskeqz $a1, $a2, $a1 - or $a7, $a1, $a0 + ld.d $a5, $sp, 32 # 8-byte Folded Reload + slt $a2, $a0, $a5 + masknez $a0, $a0, $a2 + maskeqz $a2, $a5, $a2 + or $a7, $a2, $a0 sub.d $a0, $t0, $t1 - sltui $a1, $ra, 4 + sltui $a2, $ra, 4 sltui $a0, $a0, 64 - st.d $a1, $sp, 32 # 8-byte Folded Spill - or $t5, $a1, $a0 + st.d $a2, $sp, 24 # 8-byte Folded Spill + or $t5, $a2, $a0 bstrpick.d $t6, $t2, 31, 4 slli.d $s0, $t6, 4 andi $a2, $t2, 12 @@ -314,21 +317,21 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha # Child Loop BB0_34 Depth 2 beqz $t5, .LBB0_24 # %bb.23: # in Loop: Header=BB0_22 Depth=1 - move $a1, $zero + move $a5, $zero b .LBB0_33 .p2align 4, , 16 .LBB0_24: # %vector.main.loop.iter.check # in Loop: Header=BB0_22 Depth=1 bgeu $ra, $s5, .LBB0_26 # %bb.25: # in Loop: Header=BB0_22 Depth=1 - move $a5, $zero + move $t4, $zero b .LBB0_30 .p2align 4, , 16 .LBB0_26: # %vector.body522.preheader # in Loop: Header=BB0_22 Depth=1 move $a0, $s3 - move $a1, $s2 - move $a5, $s0 + move $a5, $s2 + move $t4, $s0 .p2align 4, , 16 .LBB0_27: # %vector.body522 # Parent Loop BB0_22 Depth=1 @@ -337,44 +340,44 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha xvld $xr1, $a0, 0 xvfmul.s $xr0, $xr0, $xr0 xvfmul.s $xr1, $xr1, $xr1 - xvst $xr0, $a1, -32 - xvst $xr1, $a1, 0 - addi.d $a5, $a5, -16 - addi.d $a1, $a1, 64 + xvst $xr0, $a5, -32 + xvst $xr1, $a5, 0 + addi.d $t4, $t4, -16 + addi.d $a5, $a5, 64 addi.d $a0, $a0, 64 - bnez $a5, .LBB0_27 + bnez $t4, .LBB0_27 # %bb.28: # %middle.block527 # in Loop: Header=BB0_22 Depth=1 beq $s0, $t2, .LBB0_21 # %bb.29: # %vec.epilog.iter.check # in Loop: Header=BB0_22 Depth=1 + move $t4, $s0 move $a5, $s0 - move $a1, $s0 beqz $a2, .LBB0_33 .LBB0_30: # %vec.epilog.ph # in Loop: Header=BB0_22 Depth=1 - add.d $a0, $s4, $a5 - alsl.d $a1, $a5, $s7, 2 - alsl.d $a5, $a5, $s6, 2 + add.d $a0, $s4, $t4 + alsl.d $a5, $t4, $s7, 2 + alsl.d $t4, $t4, $s6, 2 .p2align 4, , 16 .LBB0_31: # %vec.epilog.vector.body # Parent Loop BB0_22 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr0, $a5, 0 + vld $vr0, $t4, 0 vfmul.s $vr0, $vr0, $vr0 - vst $vr0, $a1, 0 + vst $vr0, $a5, 0 addi.d $a0, $a0, 4 - addi.d $a1, $a1, 16 addi.d $a5, $a5, 16 + addi.d $t4, $t4, 16 bnez $a0, .LBB0_31 # %bb.32: # %vec.epilog.middle.block # in Loop: Header=BB0_22 Depth=1 - move $a1, $s1 + move $a5, $s1 beq $s1, $t2, .LBB0_21 .LBB0_33: # %vec.epilog.scalar.ph.preheader # in Loop: Header=BB0_22 Depth=1 - slli.d $a0, $a1, 2 - sub.d $a1, $t2, $a1 + slli.d $a0, $a5, 2 + sub.d $a5, $t2, $a5 .p2align 4, , 16 .LBB0_34: # %vec.epilog.scalar.ph # Parent Loop BB0_22 Depth=1 @@ -382,31 +385,32 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha fldx.s $fa0, $s6, $a0 fmul.s $fa0, $fa0, $fa0 fstx.s $fa0, $s7, $a0 - addi.d $a1, $a1, -1 + addi.d $a5, $a5, -1 addi.d $a0, $a0, 4 - bnez $a1, .LBB0_34 + bnez $a5, .LBB0_34 b .LBB0_21 .LBB0_35: # %.preheader311.us.preheader move $fp, $zero ori $a0, $zero, 1 - ld.d $a5, $sp, 40 # 8-byte Folded Reload - slt $a1, $a0, $a5 - masknez $a0, $a0, $a1 - maskeqz $a1, $a5, $a1 - or $s0, $a1, $a0 + ld.d $a7, $sp, 32 # 8-byte Folded Reload + slt $a5, $a0, $a7 + masknez $a0, $a0, $a5 + maskeqz $a5, $a7, $a5 + or $s0, $a5, $a0 sub.d $a0, $s8, $a4 - sub.d $a1, $s8, $t1 + sub.d $a5, $s8, $t1 sltui $a0, $a0, 64 - sltui $a1, $a1, 64 - or $a0, $a0, $a1 - slli.d $s1, $t6, 4 + sltui $a5, $a5, 64 + or $a0, $a0, $a5 + st.d $t6, $sp, 16 # 8-byte Folded Spill + slli.d $t6, $t6, 4 slli.d $s2, $t7, 2 addi.d $s3, $a4, 32 addi.d $s4, $s8, 32 addi.d $s5, $t1, 32 sub.d $s6, $zero, $t8 - ld.d $a1, $sp, 32 # 8-byte Folded Reload - or $s7, $a1, $a0 + ld.d $a5, $sp, 24 # 8-byte Folded Reload + or $s7, $a5, $a0 ori $t5, $zero, 16 move $ra, $a4 move $a7, $s8 @@ -429,75 +433,75 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha # Child Loop BB0_49 Depth 2 beqz $s7, .LBB0_39 # %bb.38: # in Loop: Header=BB0_37 Depth=1 - move $a1, $zero + move $a5, $zero b .LBB0_48 .p2align 4, , 16 .LBB0_39: # %vector.main.loop.iter.check544 # in Loop: Header=BB0_37 Depth=1 - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload bgeu $a0, $t5, .LBB0_41 # %bb.40: # in Loop: Header=BB0_37 Depth=1 - move $t4, $zero + move $s1, $zero b .LBB0_45 .p2align 4, , 16 .LBB0_41: # %vector.body548.preheader # in Loop: Header=BB0_37 Depth=1 move $a0, $s5 - move $a1, $s4 - move $a5, $s3 - move $t4, $s1 + move $a5, $s4 + move $t4, $s3 + move $s1, $t6 .p2align 4, , 16 .LBB0_42: # %vector.body548 # Parent Loop BB0_37 Depth=1 # => This Inner Loop Header: Depth=2 - xvld $xr0, $a5, -32 - xvld $xr1, $a5, 0 + xvld $xr0, $t4, -32 + xvld $xr1, $t4, 0 xvld $xr2, $a0, -32 xvld $xr3, $a0, 0 xvfmul.s $xr0, $xr2, $xr0 xvfmul.s $xr1, $xr3, $xr1 - xvst $xr0, $a1, -32 - xvst $xr1, $a1, 0 - addi.d $t4, $t4, -16 + xvst $xr0, $a5, -32 + xvst $xr1, $a5, 0 + addi.d $s1, $s1, -16 + addi.d $t4, $t4, 64 addi.d $a5, $a5, 64 - addi.d $a1, $a1, 64 addi.d $a0, $a0, 64 - bnez $t4, .LBB0_42 + bnez $s1, .LBB0_42 # %bb.43: # %middle.block555 # in Loop: Header=BB0_37 Depth=1 - beq $s1, $t2, .LBB0_36 + beq $t6, $t2, .LBB0_36 # %bb.44: # %vec.epilog.iter.check560 # in Loop: Header=BB0_37 Depth=1 - move $t4, $s1 - move $a1, $s1 + move $s1, $t6 + move $a5, $t6 beqz $a2, .LBB0_48 .LBB0_45: # %vec.epilog.ph559 # in Loop: Header=BB0_37 Depth=1 - add.d $a0, $s6, $t4 - alsl.d $a1, $t4, $a7, 2 - alsl.d $a5, $t4, $t1, 2 - alsl.d $t4, $t4, $ra, 2 + add.d $a0, $s6, $s1 + alsl.d $a5, $s1, $a7, 2 + alsl.d $t4, $s1, $t1, 2 + alsl.d $s1, $s1, $ra, 2 .p2align 4, , 16 .LBB0_46: # %vec.epilog.vector.body565 # Parent Loop BB0_37 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr0, $t4, 0 - vld $vr1, $a5, 0 + vld $vr0, $s1, 0 + vld $vr1, $t4, 0 vfmul.s $vr0, $vr1, $vr0 - vst $vr0, $a1, 0 + vst $vr0, $a5, 0 addi.d $a0, $a0, 4 - addi.d $a1, $a1, 16 addi.d $a5, $a5, 16 addi.d $t4, $t4, 16 + addi.d $s1, $s1, 16 bnez $a0, .LBB0_46 # %bb.47: # %vec.epilog.middle.block570 # in Loop: Header=BB0_37 Depth=1 - move $a1, $s2 + move $a5, $s2 beq $s2, $t2, .LBB0_36 .LBB0_48: # %vec.epilog.scalar.ph558.preheader # in Loop: Header=BB0_37 Depth=1 - slli.d $a0, $a1, 2 - sub.d $a1, $t2, $a1 + slli.d $a0, $a5, 2 + sub.d $a5, $t2, $a5 .p2align 4, , 16 .LBB0_49: # %vec.epilog.scalar.ph558 # Parent Loop BB0_37 Depth=1 @@ -506,32 +510,31 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha fldx.s $fa1, $t1, $a0 fmul.s $fa0, $fa1, $fa0 fstx.s $fa0, $a7, $a0 - addi.d $a1, $a1, -1 + addi.d $a5, $a5, -1 addi.d $a0, $a0, 4 - bnez $a1, .LBB0_49 + bnez $a5, .LBB0_49 b .LBB0_36 .LBB0_50: # %.preheader309.us.preheader move $a5, $zero ori $a0, $zero, 1 - ld.d $a3, $sp, 40 # 8-byte Folded Reload - slt $a1, $a0, $a3 - masknez $a0, $a0, $a1 - maskeqz $a1, $a3, $a1 - or $a7, $a1, $a0 + ld.d $t1, $sp, 32 # 8-byte Folded Reload + slt $a7, $a0, $t1 + masknez $a0, $a0, $a7 + maskeqz $a7, $t1, $a7 + or $a7, $a7, $a0 sub.d $a0, $a6, $a4 sltui $a0, $a0, 64 - ld.d $a1, $sp, 32 # 8-byte Folded Reload - or $t4, $a1, $a0 - slli.d $t5, $t6, 4 + ld.d $t1, $sp, 24 # 8-byte Folded Reload + or $t4, $t1, $a0 + ld.d $a0, $sp, 16 # 8-byte Folded Reload + slli.d $t5, $a0, 4 slli.d $t6, $t7, 2 addi.d $t7, $a6, 32 addi.d $fp, $a4, 32 sub.d $t8, $zero, $t8 ori $s0, $zero, 16 move $s1, $a6 - ld.d $a1, $sp, 16 # 8-byte Folded Reload - ld.d $a0, $sp, 8 # 8-byte Folded Reload - ld.d $s4, $sp, 48 # 8-byte Folded Reload + ld.d $s3, $sp, 40 # 8-byte Folded Reload b .LBB0_52 .p2align 4, , 16 .LBB0_51: # %._crit_edge.us331 @@ -549,161 +552,162 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha # Child Loop BB0_64 Depth 2 beqz $t4, .LBB0_54 # %bb.53: # in Loop: Header=BB0_52 Depth=1 - move $s2, $zero + move $t1, $zero b .LBB0_63 .p2align 4, , 16 .LBB0_54: # %vector.main.loop.iter.check580 # in Loop: Header=BB0_52 Depth=1 - bgeu $s4, $s0, .LBB0_56 + bgeu $s3, $s0, .LBB0_56 # %bb.55: # in Loop: Header=BB0_52 Depth=1 - move $t1, $zero + move $s2, $zero b .LBB0_60 .p2align 4, , 16 .LBB0_56: # %vector.body584.preheader # in Loop: Header=BB0_52 Depth=1 - move $s3, $fp - move $s2, $t7 - move $t1, $t5 + move $a0, $fp + move $t1, $t7 + move $s2, $t5 .p2align 4, , 16 .LBB0_57: # %vector.body584 # Parent Loop BB0_52 Depth=1 # => This Inner Loop Header: Depth=2 - xvld $xr0, $s3, -32 - xvld $xr1, $s3, 0 + xvld $xr0, $a0, -32 + xvld $xr1, $a0, 0 xvfmul.s $xr0, $xr0, $xr0 xvfmul.s $xr1, $xr1, $xr1 - xvst $xr0, $s2, -32 - xvst $xr1, $s2, 0 - addi.d $t1, $t1, -16 - addi.d $s2, $s2, 64 - addi.d $s3, $s3, 64 - bnez $t1, .LBB0_57 + xvst $xr0, $t1, -32 + xvst $xr1, $t1, 0 + addi.d $s2, $s2, -16 + addi.d $t1, $t1, 64 + addi.d $a0, $a0, 64 + bnez $s2, .LBB0_57 # %bb.58: # %middle.block589 # in Loop: Header=BB0_52 Depth=1 beq $t5, $t2, .LBB0_51 # %bb.59: # %vec.epilog.iter.check594 # in Loop: Header=BB0_52 Depth=1 - move $t1, $t5 move $s2, $t5 + move $t1, $t5 beqz $a2, .LBB0_63 .LBB0_60: # %vec.epilog.ph593 # in Loop: Header=BB0_52 Depth=1 - add.d $s3, $t8, $t1 - alsl.d $s2, $t1, $s1, 2 - alsl.d $t1, $t1, $a4, 2 + add.d $a0, $t8, $s2 + alsl.d $t1, $s2, $s1, 2 + alsl.d $s2, $s2, $a4, 2 .p2align 4, , 16 .LBB0_61: # %vec.epilog.vector.body599 # Parent Loop BB0_52 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr0, $t1, 0 + vld $vr0, $s2, 0 vfmul.s $vr0, $vr0, $vr0 - vst $vr0, $s2, 0 - addi.d $s3, $s3, 4 - addi.d $s2, $s2, 16 + vst $vr0, $t1, 0 + addi.d $a0, $a0, 4 addi.d $t1, $t1, 16 - bnez $s3, .LBB0_61 + addi.d $s2, $s2, 16 + bnez $a0, .LBB0_61 # %bb.62: # %vec.epilog.middle.block603 # in Loop: Header=BB0_52 Depth=1 - move $s2, $t6 + move $t1, $t6 beq $t6, $t2, .LBB0_51 .LBB0_63: # %vec.epilog.scalar.ph592.preheader # in Loop: Header=BB0_52 Depth=1 - slli.d $s3, $s2, 2 - sub.d $t1, $t2, $s2 + slli.d $a0, $t1, 2 + sub.d $t1, $t2, $t1 .p2align 4, , 16 .LBB0_64: # %vec.epilog.scalar.ph592 # Parent Loop BB0_52 Depth=1 # => This Inner Loop Header: Depth=2 - fldx.s $fa0, $a4, $s3 + fldx.s $fa0, $a4, $a0 fmul.s $fa0, $fa0, $fa0 - fstx.s $fa0, $s1, $s3 + fstx.s $fa0, $s1, $a0 addi.d $t1, $t1, -1 - addi.d $s3, $s3, 4 + addi.d $a0, $a0, 4 bnez $t1, .LBB0_64 b .LBB0_51 .LBB0_65: # %.preheader308 + ld.d $a0, $sp, 48 # 8-byte Folded Reload blez $a0, .LBB0_140 # %bb.66: # %.preheader308 blez $a1, .LBB0_140 # %bb.67: # %.preheader307.us.preheader move $s5, $zero - ld.d $a3, $sp, 184 + ld.d $a2, $sp, 184 ld.d $a4, $sp, 176 ld.d $t1, $sp, 168 - ld.d $a2, $sp, 160 + ld.d $a5, $sp, 160 ld.d $t3, $sp, 152 - slli.d $t2, $a0, 13 - add.d $a5, $t1, $t2 - alsl.d $a5, $a1, $a5, 2 - lu12i.w $a7, -2 - add.d $a5, $a5, $a7 - addi.d $a7, $t0, 4 - alsl.d $t2, $a0, $t2, 3 - add.d $t2, $t0, $t2 - alsl.d $t2, $a1, $t2, 2 - lu12i.w $s2, 2 - ori $t4, $s2, 16 - add.d $t2, $t2, $t4 - sltu $t2, $t1, $t2 - sltu $a5, $a7, $a5 - and $a5, $t2, $a5 + ld.d $t4, $sp, 48 # 8-byte Folded Reload + slli.d $a0, $t4, 13 + add.d $a7, $t1, $a0 + alsl.d $a7, $a1, $a7, 2 + lu12i.w $t2, -2 + add.d $a7, $a7, $t2 + addi.d $t2, $t0, 4 + alsl.d $a0, $t4, $a0, 3 + add.d $a0, $t0, $a0 + alsl.d $t5, $a1, $a0, 2 + lu12i.w $a0, 2 + ori $t4, $a0, 16 + add.d $t5, $t5, $t4 + sltu $t5, $t1, $t5 + sltu $a7, $t2, $a7 + and $t2, $t5, $a7 bstrpick.d $a7, $a1, 30, 3 - slli.d $a7, $a7, 3 - ori $t5, $s2, 12 - add.d $s3, $t0, $t5 - sltui $t2, $a1, 8 - st.d $t2, $sp, 48 # 8-byte Folded Spill - or $a5, $t2, $a5 - andi $t2, $a5, 1 - ori $t6, $s2, 8 - lu12i.w $a5, 4 - ori $t7, $a5, 16 - ori $t8, $a5, 20 - ori $fp, $a5, 24 + slli.d $s3, $a7, 3 + ori $t5, $a0, 12 + add.d $a7, $t0, $t5 + sltui $t6, $a1, 8 + st.d $t6, $sp, 40 # 8-byte Folded Spill + or $t2, $t6, $t2 + andi $t2, $t2, 1 + ori $t6, $a0, 8 + lu12i.w $fp, 4 + ori $t7, $fp, 16 + ori $t8, $fp, 20 + ori $fp, $fp, 24 vldi $vr0, -1280 - lu12i.w $ra, -3 - ori $s0, $ra, 4088 - ori $s1, $ra, 4092 - ori $s2, $s2, 4 - lu12i.w $a5, 262144 - xvreplgr2vr.w $xr1, $a5 + lu12i.w $s1, -3 + ori $s0, $s1, 4088 + ori $s1, $s1, 4092 + ori $s2, $a0, 4 + xvldi $xr1, -3264 move $s4, $t1 b .LBB0_69 .p2align 4, , 16 .LBB0_68: # %._crit_edge.us334 # in Loop: Header=BB0_69 Depth=1 addi.d $s5, $s5, 1 - lu12i.w $a5, 2 - add.d $s4, $s4, $a5 - add.d $s3, $s3, $t6 + add.d $s4, $s4, $a0 + add.d $a7, $a7, $t6 add.d $t0, $t0, $t6 - beq $s5, $a0, .LBB0_76 + ld.d $s6, $sp, 48 # 8-byte Folded Reload + beq $s5, $s6, .LBB0_76 .LBB0_69: # %.preheader307.us # =>This Loop Header: Depth=1 # Child Loop BB0_72 Depth 2 # Child Loop BB0_75 Depth 2 beqz $t2, .LBB0_71 # %bb.70: # in Loop: Header=BB0_69 Depth=1 - move $s6, $zero + move $s7, $zero b .LBB0_74 .p2align 4, , 16 .LBB0_71: # %vector.body618.preheader # in Loop: Header=BB0_69 Depth=1 - move $a5, $s3 move $s6, $a7 - move $s7, $s4 + move $s7, $s3 + move $ra, $s4 .p2align 4, , 16 .LBB0_72: # %vector.body618 # Parent Loop BB0_69 Depth=1 # => This Inner Loop Header: Depth=2 - xvld $xr2, $a5, -4 - xvldx $xr3, $a5, $s0 - xvldx $xr4, $a5, $s1 - xvld $xr5, $a5, 0 - xvld $xr6, $a5, 4 - xvldx $xr7, $a5, $s2 - xvldx $xr8, $a5, $t6 - xvldx $xr9, $a5, $t5 + xvld $xr2, $s6, -4 + xvldx $xr3, $s6, $s0 + xvldx $xr4, $s6, $s1 + xvld $xr5, $s6, 0 + xvld $xr6, $s6, 4 + xvldx $xr7, $s6, $s2 + xvldx $xr8, $s6, $t6 + xvldx $xr9, $s6, $t5 xvfmadd.s $xr2, $xr2, $xr1, $xr3 xvfadd.s $xr2, $xr2, $xr4 xvfadd.s $xr2, $xr2, $xr5 @@ -711,32 +715,32 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha xvfadd.s $xr2, $xr2, $xr7 xvfadd.s $xr2, $xr2, $xr8 xvfadd.s $xr2, $xr2, $xr9 - xvst $xr2, $s7, 0 - addi.d $s7, $s7, 32 - addi.d $s6, $s6, -8 - addi.d $a5, $a5, 32 - bnez $s6, .LBB0_72 + xvst $xr2, $ra, 0 + addi.d $ra, $ra, 32 + addi.d $s7, $s7, -8 + addi.d $s6, $s6, 32 + bnez $s7, .LBB0_72 # %bb.73: # %middle.block629 # in Loop: Header=BB0_69 Depth=1 - move $s6, $a7 - beq $a7, $a1, .LBB0_68 + move $s7, $s3 + beq $s3, $a1, .LBB0_68 .LBB0_74: # %scalar.ph613.preheader # in Loop: Header=BB0_69 Depth=1 - slli.d $a5, $s6, 2 - sub.d $s6, $a1, $s6 + slli.d $s6, $s7, 2 + sub.d $s7, $a1, $s7 .p2align 4, , 16 .LBB0_75: # %scalar.ph613 # Parent Loop BB0_69 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $s7, $t0, $a5 - fldx.s $fa2, $s7, $t6 - fld.s $fa3, $s7, 4 - fld.s $fa4, $s7, 8 - fldx.s $fa5, $s7, $t5 - fldx.s $fa6, $s7, $t4 - fldx.s $fa7, $s7, $t7 - fldx.s $ft0, $s7, $t8 - fldx.s $ft1, $s7, $fp + add.d $ra, $t0, $s6 + fldx.s $fa2, $ra, $t6 + fld.s $fa3, $ra, 4 + fld.s $fa4, $ra, 8 + fldx.s $fa5, $ra, $t5 + fldx.s $fa6, $ra, $t4 + fldx.s $fa7, $ra, $t7 + fldx.s $ft0, $ra, $t8 + fldx.s $ft1, $ra, $fp fmadd.s $fa2, $fa2, $fa0, $fa3 fadd.s $fa2, $fa2, $fa4 fadd.s $fa2, $fa2, $fa5 @@ -744,57 +748,56 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha fadd.s $fa2, $fa2, $fa7 fadd.s $fa2, $fa2, $ft0 fadd.s $fa2, $fa2, $ft1 - fstx.s $fa2, $s4, $a5 - addi.d $s6, $s6, -1 - addi.d $a5, $a5, 4 - bnez $s6, .LBB0_75 + fstx.s $fa2, $s4, $s6 + addi.d $s7, $s7, -1 + addi.d $s6, $s6, 4 + bnez $s7, .LBB0_75 b .LBB0_68 .LBB0_76: # %.preheader306 - blez $a0, .LBB0_140 + ld.d $a7, $sp, 48 # 8-byte Folded Reload + blez $a7, .LBB0_140 # %bb.77: # %.preheader305.us.preheader - move $t0, $a1 - move $a1, $a0 - move $t2, $ra move $ra, $zero - slli.d $s5, $a0, 13 - add.d $a5, $a2, $s5 - alsl.d $a5, $t0, $a5, 2 - lu12i.w $a0, -2 - add.d $a5, $a5, $a0 - move $a0, $a1 - alsl.d $a1, $a1, $s5, 3 - st.d $a1, $sp, 40 # 8-byte Folded Spill - add.d $a7, $s8, $a1 - alsl.d $a7, $t0, $a7, 2 - add.d $a7, $a7, $t4 - sltu $a7, $a2, $a7 - sltu $a5, $s8, $a5 - and $a5, $a7, $a5 - move $a1, $t0 - bstrpick.d $a7, $t0, 30, 3 - st.d $a7, $sp, 32 # 8-byte Folded Spill - slli.d $a7, $a7, 3 - add.d $s6, $s8, $t5 ld.d $t0, $sp, 48 # 8-byte Folded Reload - or $a5, $t0, $a5 - andi $s7, $a5, 1 - ori $t0, $t2, 4084 - move $a5, $a2 + slli.d $s3, $t0, 13 + add.d $a7, $a5, $s3 + alsl.d $a7, $a1, $a7, 2 + lu12i.w $t2, -2 + add.d $a7, $a7, $t2 + st.d $s3, $sp, 32 # 8-byte Folded Spill + alsl.d $t0, $t0, $s3, 3 + st.d $t0, $sp, 24 # 8-byte Folded Spill + add.d $t0, $s8, $t0 + alsl.d $t0, $a1, $t0, 2 + add.d $t0, $t0, $t4 + sltu $t0, $a5, $t0 + sltu $a7, $s8, $a7 + and $a7, $t0, $a7 + bstrpick.d $t0, $a1, 30, 3 + st.d $t0, $sp, 16 # 8-byte Folded Spill + slli.d $s5, $t0, 3 + add.d $s6, $s8, $t5 + ld.d $t0, $sp, 40 # 8-byte Folded Reload + or $a7, $t0, $a7 + andi $a7, $a7, 1 + lu12i.w $t0, -3 + ori $t0, $t0, 4084 + move $s7, $a5 b .LBB0_79 .p2align 4, , 16 .LBB0_78: # %._crit_edge.us337 # in Loop: Header=BB0_79 Depth=1 addi.d $ra, $ra, 1 - lu12i.w $t2, 2 - add.d $a5, $a5, $t2 + add.d $s7, $s7, $a0 add.d $s6, $s6, $t6 add.d $s8, $s8, $t6 - beq $ra, $a0, .LBB0_86 + ld.d $t2, $sp, 48 # 8-byte Folded Reload + beq $ra, $t2, .LBB0_86 .LBB0_79: # %.preheader305.us # =>This Loop Header: Depth=1 # Child Loop BB0_82 Depth 2 # Child Loop BB0_85 Depth 2 - beqz $s7, .LBB0_81 + beqz $a7, .LBB0_81 # %bb.80: # in Loop: Header=BB0_79 Depth=1 move $t2, $zero b .LBB0_84 @@ -802,8 +805,8 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha .LBB0_81: # %vector.body643.preheader # in Loop: Header=BB0_79 Depth=1 move $s4, $s6 - move $s3, $a7 - move $t2, $a5 + move $s3, $s5 + move $t2, $s7 .p2align 4, , 16 .LBB0_82: # %vector.body643 # Parent Loop BB0_79 Depth=1 @@ -832,8 +835,8 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha bnez $s3, .LBB0_82 # %bb.83: # %middle.block655 # in Loop: Header=BB0_79 Depth=1 - move $t2, $a7 - beq $a7, $a1, .LBB0_78 + move $t2, $s5 + beq $s5, $a1, .LBB0_78 .LBB0_84: # %scalar.ph638.preheader # in Loop: Header=BB0_79 Depth=1 sub.d $s3, $a1, $t2 @@ -860,140 +863,139 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha fadd.s $fa0, $fa0, $fa2 fadd.s $fa0, $fa0, $fa3 fadd.s $fa0, $fa0, $fa4 - fstx.s $fa0, $a5, $t2 + fstx.s $fa0, $s7, $t2 addi.d $s3, $s3, -1 addi.d $t2, $t2, 4 bnez $s3, .LBB0_85 b .LBB0_78 .LBB0_86: # %.preheader303.us.preheader - move $s7, $zero - add.d $a5, $t3, $s5 - alsl.d $a5, $a1, $a5, 2 - lu12i.w $a7, -2 - add.d $a5, $a5, $a7 - ld.d $a7, $sp, 40 # 8-byte Folded Reload - add.d $a7, $a6, $a7 - alsl.d $a7, $a1, $a7, 2 - add.d $a7, $a7, $t4 - sltu $a7, $t3, $a7 - sltu $a5, $a6, $a5 - and $a5, $a7, $a5 - ld.d $a7, $sp, 32 # 8-byte Folded Reload - slli.d $a7, $a7, 3 - add.d $s3, $a6, $t5 - ld.d $t2, $sp, 48 # 8-byte Folded Reload - or $a5, $t2, $a5 - andi $s4, $a5, 1 + move $a7, $zero + ld.d $t2, $sp, 32 # 8-byte Folded Reload + add.d $t2, $t3, $t2 + alsl.d $t2, $a1, $t2, 2 + lu12i.w $s3, -2 + add.d $t2, $t2, $s3 + ld.d $s3, $sp, 24 # 8-byte Folded Reload + add.d $s3, $a6, $s3 + alsl.d $s3, $a1, $s3, 2 + add.d $s3, $s3, $t4 + sltu $s3, $t3, $s3 + sltu $t2, $a6, $t2 + and $t2, $s3, $t2 + ld.d $s3, $sp, 16 # 8-byte Folded Reload + slli.d $s3, $s3, 3 + add.d $s4, $a6, $t5 + ld.d $s5, $sp, 40 # 8-byte Folded Reload + or $t2, $s5, $t2 + andi $s5, $t2, 1 move $t2, $t3 - move $ra, $a0 - lu12i.w $a0, 2 b .LBB0_88 .p2align 4, , 16 .LBB0_87: # %._crit_edge.us340 # in Loop: Header=BB0_88 Depth=1 - addi.d $s7, $s7, 1 + addi.d $a7, $a7, 1 add.d $t2, $t2, $a0 - add.d $s3, $s3, $t6 + add.d $s4, $s4, $t6 add.d $a6, $a6, $t6 - beq $s7, $ra, .LBB0_95 + ld.d $s6, $sp, 48 # 8-byte Folded Reload + beq $a7, $s6, .LBB0_95 .LBB0_88: # %.preheader303.us # =>This Loop Header: Depth=1 # Child Loop BB0_91 Depth 2 # Child Loop BB0_94 Depth 2 - beqz $s4, .LBB0_90 + beqz $s5, .LBB0_90 # %bb.89: # in Loop: Header=BB0_88 Depth=1 - move $a5, $zero + move $s7, $zero b .LBB0_93 .p2align 4, , 16 .LBB0_90: # %vector.body669.preheader # in Loop: Header=BB0_88 Depth=1 - move $s5, $s3 - move $s6, $a7 - move $a5, $t2 + move $s6, $s4 + move $s8, $s3 + move $s7, $t2 .p2align 4, , 16 .LBB0_91: # %vector.body669 # Parent Loop BB0_88 Depth=1 # => This Inner Loop Header: Depth=2 - xvldx $xr0, $s5, $t0 - xvldx $xr1, $s5, $s0 - xvldx $xr2, $s5, $s1 - xvld $xr3, $s5, -4 - xvld $xr4, $s5, 0 + xvldx $xr0, $s6, $t0 + xvldx $xr1, $s6, $s0 + xvldx $xr2, $s6, $s1 + xvld $xr3, $s6, -4 + xvld $xr4, $s6, 0 xvfadd.s $xr0, $xr1, $xr0 xvfadd.s $xr0, $xr0, $xr2 xvfadd.s $xr0, $xr0, $xr3 xvfadd.s $xr0, $xr0, $xr4 - xvld $xr1, $s5, 4 - xvldx $xr2, $s5, $s2 - xvldx $xr3, $s5, $t6 - xvldx $xr4, $s5, $t5 + xvld $xr1, $s6, 4 + xvldx $xr2, $s6, $s2 + xvldx $xr3, $s6, $t6 + xvldx $xr4, $s6, $t5 xvfadd.s $xr0, $xr0, $xr1 xvfadd.s $xr0, $xr0, $xr2 xvfadd.s $xr0, $xr0, $xr3 xvfadd.s $xr0, $xr0, $xr4 - xvst $xr0, $a5, 0 - addi.d $a5, $a5, 32 - addi.d $s6, $s6, -8 - addi.d $s5, $s5, 32 - bnez $s6, .LBB0_91 + xvst $xr0, $s7, 0 + addi.d $s7, $s7, 32 + addi.d $s8, $s8, -8 + addi.d $s6, $s6, 32 + bnez $s8, .LBB0_91 # %bb.92: # %middle.block681 # in Loop: Header=BB0_88 Depth=1 - move $a5, $a7 - beq $a7, $a1, .LBB0_87 + move $s7, $s3 + beq $s3, $a1, .LBB0_87 .LBB0_93: # %scalar.ph664.preheader # in Loop: Header=BB0_88 Depth=1 - sub.d $s5, $a1, $a5 - slli.d $a5, $a5, 2 + sub.d $s6, $a1, $s7 + slli.d $s7, $s7, 2 .p2align 4, , 16 .LBB0_94: # %scalar.ph664 # Parent Loop BB0_88 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $s6, $a6, $a5 - fldx.s $fa0, $a6, $a5 - fld.s $fa1, $s6, 4 - fld.s $fa2, $s6, 8 - fldx.s $fa3, $s6, $t6 - fldx.s $fa4, $s6, $t5 + add.d $s8, $a6, $s7 + fldx.s $fa0, $a6, $s7 + fld.s $fa1, $s8, 4 + fld.s $fa2, $s8, 8 + fldx.s $fa3, $s8, $t6 + fldx.s $fa4, $s8, $t5 fadd.s $fa0, $fa1, $fa0 fadd.s $fa0, $fa0, $fa2 fadd.s $fa0, $fa0, $fa3 fadd.s $fa0, $fa0, $fa4 - fldx.s $fa1, $s6, $t4 - fldx.s $fa2, $s6, $t7 - fldx.s $fa3, $s6, $t8 - fldx.s $fa4, $s6, $fp + fldx.s $fa1, $s8, $t4 + fldx.s $fa2, $s8, $t7 + fldx.s $fa3, $s8, $t8 + fldx.s $fa4, $s8, $fp fadd.s $fa0, $fa0, $fa1 fadd.s $fa0, $fa0, $fa2 fadd.s $fa0, $fa0, $fa3 fadd.s $fa0, $fa0, $fa4 - fstx.s $fa0, $t2, $a5 - addi.d $s5, $s5, -1 - addi.d $a5, $a5, 4 - bnez $s5, .LBB0_94 + fstx.s $fa0, $t2, $s7 + addi.d $s6, $s6, -1 + addi.d $s7, $s7, 4 + bnez $s6, .LBB0_94 b .LBB0_87 .LBB0_95: # %.preheader301.us.preheader move $t5, $zero - sub.d $a5, $a3, $t3 - sub.d $a6, $a3, $t1 - sltui $a5, $a5, 64 + sub.d $a6, $a2, $t3 + sub.d $a7, $a2, $t1 sltui $a6, $a6, 64 - or $a5, $a5, $a6 + sltui $a7, $a7, 64 + or $t2, $a6, $a7 bstrpick.d $t0, $a1, 30, 4 slli.d $t6, $t0, 4 andi $a6, $a1, 12 bstrpick.d $t4, $a1, 30, 2 slli.d $t7, $t4, 2 addi.d $t8, $t3, 32 - addi.d $fp, $a3, 32 + addi.d $fp, $a2, 32 addi.d $s0, $t1, 32 sub.d $s1, $zero, $t7 sltui $a7, $a1, 4 - or $s2, $a7, $a5 + or $s2, $a7, $t2 ori $s3, $zero, 16 move $s4, $t3 move $s5, $t1 - move $s6, $a3 - lu12i.w $a0, 2 + move $s6, $a2 b .LBB0_97 .p2align 4, , 16 .LBB0_96: # %._crit_edge.us343 @@ -1005,7 +1007,8 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha add.d $s6, $s6, $a0 add.d $s5, $s5, $a0 add.d $s4, $s4, $a0 - beq $t5, $ra, .LBB0_110 + ld.d $t2, $sp, 48 # 8-byte Folded Reload + beq $t5, $t2, .LBB0_110 .LBB0_97: # %iter.check691 # =>This Loop Header: Depth=1 # Child Loop BB0_102 Depth 2 @@ -1013,106 +1016,105 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha # Child Loop BB0_109 Depth 2 beqz $s2, .LBB0_99 # %bb.98: # in Loop: Header=BB0_97 Depth=1 - move $t2, $zero + move $s7, $zero b .LBB0_108 .p2align 4, , 16 .LBB0_99: # %vector.main.loop.iter.check693 # in Loop: Header=BB0_97 Depth=1 bgeu $a1, $s3, .LBB0_101 # %bb.100: # in Loop: Header=BB0_97 Depth=1 - move $s8, $zero + move $ra, $zero b .LBB0_105 .p2align 4, , 16 .LBB0_101: # %vector.body697.preheader # in Loop: Header=BB0_97 Depth=1 - move $a5, $s0 - move $t2, $fp - move $s7, $t8 - move $s8, $t6 + move $t2, $s0 + move $s7, $fp + move $s8, $t8 + move $ra, $t6 .p2align 4, , 16 .LBB0_102: # %vector.body697 # Parent Loop BB0_97 Depth=1 # => This Inner Loop Header: Depth=2 - xvld $xr0, $s7, -32 - xvld $xr1, $s7, 0 - xvld $xr2, $a5, -32 - xvld $xr3, $a5, 0 + xvld $xr0, $s8, -32 + xvld $xr1, $s8, 0 + xvld $xr2, $t2, -32 + xvld $xr3, $t2, 0 xvfadd.s $xr0, $xr2, $xr0 xvfadd.s $xr1, $xr3, $xr1 - xvst $xr0, $t2, -32 - xvst $xr1, $t2, 0 - addi.d $s8, $s8, -16 + xvst $xr0, $s7, -32 + xvst $xr1, $s7, 0 + addi.d $ra, $ra, -16 + addi.d $s8, $s8, 64 addi.d $s7, $s7, 64 addi.d $t2, $t2, 64 - addi.d $a5, $a5, 64 - bnez $s8, .LBB0_102 + bnez $ra, .LBB0_102 # %bb.103: # %middle.block704 # in Loop: Header=BB0_97 Depth=1 beq $t6, $a1, .LBB0_96 # %bb.104: # %vec.epilog.iter.check709 # in Loop: Header=BB0_97 Depth=1 - move $s8, $t6 - move $t2, $t6 + move $ra, $t6 + move $s7, $t6 beqz $a6, .LBB0_108 .LBB0_105: # %vec.epilog.ph708 # in Loop: Header=BB0_97 Depth=1 - add.d $a5, $s1, $s8 - alsl.d $t2, $s8, $s6, 2 - alsl.d $s7, $s8, $s5, 2 - alsl.d $s8, $s8, $s4, 2 + add.d $t2, $s1, $ra + alsl.d $s7, $ra, $s6, 2 + alsl.d $s8, $ra, $s5, 2 + alsl.d $ra, $ra, $s4, 2 .p2align 4, , 16 .LBB0_106: # %vec.epilog.vector.body714 # Parent Loop BB0_97 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr0, $s8, 0 - vld $vr1, $s7, 0 + vld $vr0, $ra, 0 + vld $vr1, $s8, 0 vfadd.s $vr0, $vr1, $vr0 - vst $vr0, $t2, 0 - addi.d $a5, $a5, 4 - addi.d $t2, $t2, 16 + vst $vr0, $s7, 0 + addi.d $t2, $t2, 4 addi.d $s7, $s7, 16 addi.d $s8, $s8, 16 - bnez $a5, .LBB0_106 + addi.d $ra, $ra, 16 + bnez $t2, .LBB0_106 # %bb.107: # %vec.epilog.middle.block719 # in Loop: Header=BB0_97 Depth=1 - move $t2, $t7 + move $s7, $t7 beq $t7, $a1, .LBB0_96 .LBB0_108: # %vec.epilog.scalar.ph707.preheader # in Loop: Header=BB0_97 Depth=1 - slli.d $a5, $t2, 2 - sub.d $t2, $a1, $t2 + slli.d $t2, $s7, 2 + sub.d $s7, $a1, $s7 .p2align 4, , 16 .LBB0_109: # %vec.epilog.scalar.ph707 # Parent Loop BB0_97 Depth=1 # => This Inner Loop Header: Depth=2 - fldx.s $fa0, $s4, $a5 - fldx.s $fa1, $s5, $a5 + fldx.s $fa0, $s4, $t2 + fldx.s $fa1, $s5, $t2 fadd.s $fa0, $fa1, $fa0 - fstx.s $fa0, $s6, $a5 - addi.d $t2, $t2, -1 - addi.d $a5, $a5, 4 - bnez $t2, .LBB0_109 + fstx.s $fa0, $s6, $t2 + addi.d $s7, $s7, -1 + addi.d $t2, $t2, 4 + bnez $s7, .LBB0_109 b .LBB0_96 .LBB0_110: # %.preheader299.us.preheader move $t5, $zero - sub.d $a5, $a4, $t3 - sub.d $t2, $a4, $t1 - sub.d $t6, $a4, $a2 - sltui $a5, $a5, 64 + sub.d $t2, $a4, $t3 + sub.d $t6, $a4, $t1 + sub.d $t7, $a4, $a5 sltui $t2, $t2, 64 - or $a5, $a5, $t2 - sltui $t2, $t6, 64 - or $t6, $a5, $t2 + sltui $t6, $t6, 64 + or $t2, $t2, $t6 + sltui $t6, $t7, 64 + or $t6, $t2, $t6 slli.d $t7, $t0, 4 slli.d $t8, $t4, 2 addi.d $fp, $t3, 32 addi.d $s0, $a4, 32 addi.d $s1, $t1, 32 - addi.d $s2, $a2, 32 + addi.d $s2, $a5, 32 sub.d $s3, $zero, $t8 ori $s4, $zero, 16 move $s5, $a4 - lu12i.w $a0, 2 b .LBB0_112 .p2align 4, , 16 .LBB0_111: # %._crit_edge.us346 @@ -1123,239 +1125,239 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha add.d $s1, $s1, $a0 add.d $s2, $s2, $a0 add.d $s5, $s5, $a0 - add.d $a2, $a2, $a0 + add.d $a5, $a5, $a0 add.d $t1, $t1, $a0 add.d $t3, $t3, $a0 - beq $t5, $ra, .LBB0_125 + ld.d $t2, $sp, 48 # 8-byte Folded Reload + beq $t5, $t2, .LBB0_125 .LBB0_112: # %iter.check731 # =>This Loop Header: Depth=1 # Child Loop BB0_117 Depth 2 # Child Loop BB0_121 Depth 2 # Child Loop BB0_124 Depth 2 - or $a5, $a7, $t6 - beqz $a5, .LBB0_114 + or $t2, $a7, $t6 + beqz $t2, .LBB0_114 # %bb.113: # in Loop: Header=BB0_112 Depth=1 - move $t2, $zero + move $s6, $zero b .LBB0_123 .p2align 4, , 16 .LBB0_114: # %vector.main.loop.iter.check733 # in Loop: Header=BB0_112 Depth=1 bgeu $a1, $s4, .LBB0_116 # %bb.115: # in Loop: Header=BB0_112 Depth=1 - move $s8, $zero + move $ra, $zero b .LBB0_120 .p2align 4, , 16 .LBB0_116: # %vector.body737.preheader # in Loop: Header=BB0_112 Depth=1 - move $a5, $s2 - move $t2, $s1 - move $s6, $s0 - move $s7, $fp - move $s8, $t7 + move $t2, $s2 + move $s6, $s1 + move $s7, $s0 + move $s8, $fp + move $ra, $t7 .p2align 4, , 16 .LBB0_117: # %vector.body737 # Parent Loop BB0_112 Depth=1 # => This Inner Loop Header: Depth=2 - xvld $xr0, $s7, -32 - xvld $xr1, $s7, 0 - xvld $xr2, $a5, -32 - xvld $xr3, $a5, 0 - xvld $xr4, $t2, -32 - xvld $xr5, $t2, 0 + xvld $xr0, $s8, -32 + xvld $xr1, $s8, 0 + xvld $xr2, $t2, -32 + xvld $xr3, $t2, 0 + xvld $xr4, $s6, -32 + xvld $xr5, $s6, 0 xvfmul.s $xr2, $xr2, $xr2 xvfmul.s $xr3, $xr3, $xr3 xvfmsub.s $xr0, $xr4, $xr0, $xr2 xvfmsub.s $xr1, $xr5, $xr1, $xr3 - xvst $xr0, $s6, -32 - xvst $xr1, $s6, 0 - addi.d $s8, $s8, -16 + xvst $xr0, $s7, -32 + xvst $xr1, $s7, 0 + addi.d $ra, $ra, -16 + addi.d $s8, $s8, 64 addi.d $s7, $s7, 64 addi.d $s6, $s6, 64 addi.d $t2, $t2, 64 - addi.d $a5, $a5, 64 - bnez $s8, .LBB0_117 + bnez $ra, .LBB0_117 # %bb.118: # %middle.block746 # in Loop: Header=BB0_112 Depth=1 beq $t7, $a1, .LBB0_111 # %bb.119: # %vec.epilog.iter.check751 # in Loop: Header=BB0_112 Depth=1 - move $s8, $t7 - move $t2, $t7 + move $ra, $t7 + move $s6, $t7 beqz $a6, .LBB0_123 .LBB0_120: # %vec.epilog.ph750 # in Loop: Header=BB0_112 Depth=1 - add.d $a5, $s3, $s8 - alsl.d $t2, $s8, $s5, 2 - alsl.d $s6, $s8, $a2, 2 - alsl.d $s7, $s8, $t1, 2 - alsl.d $s8, $s8, $t3, 2 + add.d $t2, $s3, $ra + alsl.d $s6, $ra, $s5, 2 + alsl.d $s7, $ra, $a5, 2 + alsl.d $s8, $ra, $t1, 2 + alsl.d $ra, $ra, $t3, 2 .p2align 4, , 16 .LBB0_121: # %vec.epilog.vector.body756 # Parent Loop BB0_112 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr0, $s8, 0 - vld $vr1, $s6, 0 - vld $vr2, $s7, 0 + vld $vr0, $ra, 0 + vld $vr1, $s7, 0 + vld $vr2, $s8, 0 vfmul.s $vr1, $vr1, $vr1 vfmsub.s $vr0, $vr2, $vr0, $vr1 - vst $vr0, $t2, 0 - addi.d $a5, $a5, 4 - addi.d $t2, $t2, 16 + vst $vr0, $s6, 0 + addi.d $t2, $t2, 4 addi.d $s6, $s6, 16 addi.d $s7, $s7, 16 addi.d $s8, $s8, 16 - bnez $a5, .LBB0_121 + addi.d $ra, $ra, 16 + bnez $t2, .LBB0_121 # %bb.122: # %vec.epilog.middle.block762 # in Loop: Header=BB0_112 Depth=1 - move $t2, $t8 + move $s6, $t8 beq $t8, $a1, .LBB0_111 .LBB0_123: # %vec.epilog.scalar.ph749.preheader # in Loop: Header=BB0_112 Depth=1 - slli.d $a5, $t2, 2 - sub.d $t2, $a1, $t2 + slli.d $t2, $s6, 2 + sub.d $s6, $a1, $s6 .p2align 4, , 16 .LBB0_124: # %vec.epilog.scalar.ph749 # Parent Loop BB0_112 Depth=1 # => This Inner Loop Header: Depth=2 - fldx.s $fa0, $t3, $a5 - fldx.s $fa1, $a2, $a5 - fldx.s $fa2, $t1, $a5 + fldx.s $fa0, $t3, $t2 + fldx.s $fa1, $a5, $t2 + fldx.s $fa2, $t1, $t2 fmul.s $fa1, $fa1, $fa1 fmsub.s $fa0, $fa2, $fa0, $fa1 - fstx.s $fa0, $s5, $a5 - addi.d $t2, $t2, -1 - addi.d $a5, $a5, 4 - bnez $t2, .LBB0_124 + fstx.s $fa0, $s5, $t2 + addi.d $s6, $s6, -1 + addi.d $t2, $t2, 4 + bnez $s6, .LBB0_124 b .LBB0_111 .LBB0_125: # %.preheader.us.preheader move $t1, $zero - ld.d $a2, $sp, 24 # 8-byte Folded Reload - sub.d $a0, $a2, $a4 - sub.d $a5, $a2, $a3 - sltui $a0, $a0, 64 + sub.d $a5, $a3, $a4 + sub.d $t2, $a3, $a2 sltui $a5, $a5, 64 - or $t2, $a0, $a5 + sltui $t2, $t2, 64 + or $t2, $a5, $t2 slli.d $t0, $t0, 4 slli.d $t3, $t4, 2 addi.d $t4, $a4, 32 - addi.d $t5, $a2, 32 - addi.d $t6, $a3, 32 + addi.d $t5, $a3, 32 + addi.d $t6, $a2, 32 sub.d $t7, $zero, $t3 - lu12i.w $a0, -273859 - ori $a0, $a0, 1802 - vreplgr2vr.w $vr0, $a0 - lu12i.w $a0, 250429 - ori $a0, $a0, 1802 - xvreplgr2vr.w $xr1, $a0 + lu12i.w $a5, -273859 + ori $a5, $a5, 1802 + vreplgr2vr.w $vr0, $a5 + lu12i.w $a5, 250429 + ori $a5, $a5, 1802 + xvreplgr2vr.w $xr1, $a5 ori $t8, $zero, 16 b .LBB0_127 .p2align 4, , 16 .LBB0_126: # %._crit_edge.us350 # in Loop: Header=BB0_127 Depth=1 addi.d $t1, $t1, 1 - lu12i.w $a0, 2 add.d $t4, $t4, $a0 add.d $t5, $t5, $a0 add.d $t6, $t6, $a0 - add.d $a2, $a2, $a0 add.d $a3, $a3, $a0 + add.d $a2, $a2, $a0 add.d $a4, $a4, $a0 - beq $t1, $ra, .LBB0_140 + ld.d $a5, $sp, 48 # 8-byte Folded Reload + beq $t1, $a5, .LBB0_140 .LBB0_127: # %iter.check772 # =>This Loop Header: Depth=1 # Child Loop BB0_132 Depth 2 # Child Loop BB0_136 Depth 2 # Child Loop BB0_139 Depth 2 - or $a0, $a7, $t2 - beqz $a0, .LBB0_129 + or $a5, $a7, $t2 + beqz $a5, .LBB0_129 # %bb.128: # in Loop: Header=BB0_127 Depth=1 - move $a5, $zero + move $fp, $zero b .LBB0_138 .p2align 4, , 16 .LBB0_129: # %vector.main.loop.iter.check774 # in Loop: Header=BB0_127 Depth=1 bgeu $a1, $t8, .LBB0_131 # %bb.130: # in Loop: Header=BB0_127 Depth=1 - move $s0, $zero + move $s1, $zero b .LBB0_135 .p2align 4, , 16 .LBB0_131: # %vector.body778.preheader # in Loop: Header=BB0_127 Depth=1 - move $a0, $t6 - move $a5, $t5 - move $fp, $t4 - move $s0, $t0 + move $a5, $t6 + move $fp, $t5 + move $s0, $t4 + move $s1, $t0 .p2align 4, , 16 .LBB0_132: # %vector.body778 # Parent Loop BB0_127 Depth=1 # => This Inner Loop Header: Depth=2 - xvld $xr2, $a0, -32 - xvld $xr3, $a0, 0 - xvld $xr4, $fp, -32 - xvld $xr5, $fp, 0 + xvld $xr2, $a5, -32 + xvld $xr3, $a5, 0 + xvld $xr4, $s0, -32 + xvld $xr5, $s0, 0 xvfmul.s $xr2, $xr2, $xr2 xvfmul.s $xr3, $xr3, $xr3 xvfnmsub.s $xr2, $xr2, $xr1, $xr4 xvfnmsub.s $xr3, $xr3, $xr1, $xr5 - xvst $xr2, $a5, -32 - xvst $xr3, $a5, 0 - addi.d $s0, $s0, -16 + xvst $xr2, $fp, -32 + xvst $xr3, $fp, 0 + addi.d $s1, $s1, -16 + addi.d $s0, $s0, 64 addi.d $fp, $fp, 64 addi.d $a5, $a5, 64 - addi.d $a0, $a0, 64 - bnez $s0, .LBB0_132 + bnez $s1, .LBB0_132 # %bb.133: # %middle.block785 # in Loop: Header=BB0_127 Depth=1 beq $t0, $a1, .LBB0_126 # %bb.134: # %vec.epilog.iter.check790 # in Loop: Header=BB0_127 Depth=1 - move $s0, $t0 - move $a5, $t0 + move $s1, $t0 + move $fp, $t0 beqz $a6, .LBB0_138 .LBB0_135: # %vec.epilog.ph789 # in Loop: Header=BB0_127 Depth=1 - add.d $a0, $t7, $s0 - alsl.d $a5, $s0, $a2, 2 - alsl.d $fp, $s0, $a3, 2 - alsl.d $s0, $s0, $a4, 2 + add.d $a5, $t7, $s1 + alsl.d $fp, $s1, $a3, 2 + alsl.d $s0, $s1, $a2, 2 + alsl.d $s1, $s1, $a4, 2 .p2align 4, , 16 .LBB0_136: # %vec.epilog.vector.body795 # Parent Loop BB0_127 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr2, $fp, 0 - vld $vr3, $s0, 0 + vld $vr2, $s0, 0 + vld $vr3, $s1, 0 vfmul.s $vr2, $vr2, $vr2 vfmadd.s $vr2, $vr2, $vr0, $vr3 - vst $vr2, $a5, 0 - addi.d $a0, $a0, 4 - addi.d $a5, $a5, 16 + vst $vr2, $fp, 0 + addi.d $a5, $a5, 4 addi.d $fp, $fp, 16 addi.d $s0, $s0, 16 - bnez $a0, .LBB0_136 + addi.d $s1, $s1, 16 + bnez $a5, .LBB0_136 # %bb.137: # %vec.epilog.middle.block800 # in Loop: Header=BB0_127 Depth=1 - move $a5, $t3 + move $fp, $t3 beq $t3, $a1, .LBB0_126 .LBB0_138: # %vec.epilog.scalar.ph788.preheader # in Loop: Header=BB0_127 Depth=1 - slli.d $a0, $a5, 2 - sub.d $a5, $a1, $a5 + slli.d $a5, $fp, 2 + sub.d $fp, $a1, $fp .p2align 4, , 16 .LBB0_139: # %vec.epilog.scalar.ph788 # Parent Loop BB0_127 Depth=1 # => This Inner Loop Header: Depth=2 - fldx.s $fa2, $a4, $a0 - fldx.s $fa3, $a3, $a0 - pcalau12i $fp, %pc_hi20(.LCPI0_2) - fld.s $fa4, $fp, %pc_lo12(.LCPI0_2) + fldx.s $fa2, $a4, $a5 + fldx.s $fa3, $a2, $a5 + pcalau12i $s0, %pc_hi20(.LCPI0_2) + fld.s $fa4, $s0, %pc_lo12(.LCPI0_2) fmul.s $fa3, $fa3, $fa3 fmadd.s $fa2, $fa3, $fa4, $fa2 - fstx.s $fa2, $a2, $a0 - addi.d $a5, $a5, -1 - addi.d $a0, $a0, 4 - bnez $a5, .LBB0_139 + fstx.s $fa2, $a3, $a5 + addi.d $fp, $fp, -1 + addi.d $a5, $a5, 4 + bnez $fp, .LBB0_139 b .LBB0_126 -.LBB0_140: +.LBB0_140: # %._crit_edge349 ld.d $s8, $sp, 56 # 8-byte Folded Reload ld.d $s7, $sp, 64 # 8-byte Folded Reload ld.d $s6, $sp, 72 # 8-byte Folded Reload @@ -1368,7 +1370,6 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha ld.d $fp, $sp, 128 # 8-byte Folded Reload ld.d $ra, $sp, 136 # 8-byte Folded Reload addi.d $sp, $sp, 144 -.LBB0_141: # %._crit_edge349 ret .Lfunc_end0: .size _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_, .Lfunc_end0-_Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_ diff --git a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_autoit.s b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_autoit.s index 67ab2169..113f5060 100644 --- a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_autoit.s +++ b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_autoit.s @@ -4779,22 +4779,22 @@ MT_decrypt: # @MT_decrypt lu12i.w $a6, 524287 ori $t4, $a6, 4094 xvreplgr2vr.w $xr0, $t4 - lu12i.w $a6, -524288 - xvreplgr2vr.w $xr1, $a6 + xvldi $xr1, -3200 xvrepli.w $xr2, 1 - lu12i.w $a7, -421749 - ori $a7, $a7, 223 - xvreplgr2vr.w $xr3, $a7 - ori $t0, $zero, 896 + lu12i.w $a6, -421749 + ori $a6, $a6, 223 + xvreplgr2vr.w $xr3, $a6 + ori $a7, $zero, 896 + lu12i.w $t0, -524288 + lu32i.d $t0, 0 ori $t1, $zero, 2484 ori $t2, $zero, 2488 ori $t3, $zero, 2492 vreplgr2vr.w $vr4, $t4 - vreplgr2vr.w $vr5, $a6 - lu32i.d $a6, 0 + vldi $vr5, -3200 vrepli.w $vr6, 1 - vreplgr2vr.w $vr7, $a7 - lu32i.d $a7, 0 + vreplgr2vr.w $vr7, $a6 + lu32i.d $a6, 0 ori $t4, $zero, 1584 lu12i.w $t5, -404795 ori $t5, $t5, 1664 @@ -4874,43 +4874,43 @@ MT_decrypt: # @MT_decrypt xvxor.v $xr9, $xr10, $xr9 xvstx $xr9, $t7, $a3 addi.d $t7, $t7, 32 - bne $t7, $t0, .LBB1_8 + bne $t7, $a7, .LBB1_8 # %bb.9: # %vector.ph # in Loop: Header=BB1_6 Depth=1 ld.wu $t7, $sp, 916 move $a2, $zero xvpickve2gr.w $t8, $xr8, 7 - and $t8, $t8, $a6 + and $t8, $t8, $t0 srli.d $fp, $t7, 1 bstrins.d $t8, $fp, 30, 1 srli.d $t8, $t8, 1 ldx.w $fp, $t1, $a3 andi $s0, $t7, 1 sub.d $s0, $zero, $s0 - and $s0, $s0, $a7 + and $s0, $s0, $a6 xor $fp, $s0, $fp ld.wu $s0, $sp, 920 xor $t8, $fp, $t8 st.w $t8, $sp, 912 - and $t7, $t7, $a6 + and $t7, $t7, $t0 srli.d $t8, $s0, 1 bstrins.d $t7, $t8, 30, 1 srli.d $t7, $t7, 1 ldx.w $t8, $t2, $a3 andi $fp, $s0, 1 sub.d $fp, $zero, $fp - and $fp, $fp, $a7 + and $fp, $fp, $a6 xor $t8, $fp, $t8 ld.wu $fp, $sp, 924 xor $t7, $t8, $t7 st.w $t7, $sp, 916 - and $t7, $s0, $a6 + and $t7, $s0, $t0 srli.d $t8, $fp, 1 bstrins.d $t7, $t8, 30, 1 ldx.w $t8, $t3, $a3 andi $s0, $fp, 1 sub.d $s0, $zero, $s0 - and $s0, $s0, $a7 + and $s0, $s0, $a6 xor $t8, $s0, $t8 srli.d $t7, $t7, 1 xor $t7, $t8, $t7 @@ -4944,14 +4944,14 @@ MT_decrypt: # @MT_decrypt addi.d $t7, $sp, 16 ldx.wu $t8, $t3, $t7 ld.wu $a2, $sp, 16 - and $t8, $t8, $a6 + and $t8, $t8, $t0 srli.d $fp, $a2, 1 bstrins.d $t8, $fp, 30, 1 srli.d $t8, $t8, 1 ld.w $fp, $sp, 1600 andi $s0, $a2, 1 sub.d $s0, $zero, $s0 - and $s0, $s0, $a7 + and $s0, $s0, $a6 xor $fp, $s0, $fp xor $t8, $fp, $t8 ori $fp, $zero, 2508 diff --git a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_mew.s b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_mew.s index f83ce41c..756b9348 100644 --- a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_mew.s +++ b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_mew.s @@ -83,8 +83,7 @@ mew_lzma: # @mew_lzma st.d $a1, $sp, 176 # 8-byte Folded Spill lu12i.w $a6, -7 ori $t0, $a6, 448 - ori $a1, $zero, 1024 - xvreplgr2vr.h $xr0, $a1 + xvldi $xr0, -2812 ori $s4, $a5, 3648 lu12i.w $a1, 16384 ori $t1, $a1, 1024 diff --git a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_mspack.s b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_mspack.s index c43884df..8db83646 100644 --- a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_mspack.s +++ b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_mspack.s @@ -12746,9 +12746,7 @@ mszip_make_decode_table: # @mszip_make_decode_table st.d $a0, $sp, 16 # 8-byte Folded Spill bstrpick.d $a4, $a0, 15, 0 addi.w $t2, $zero, -1 - ori $t3, $zero, 0 - lu32i.d $t3, -1 - vreplgr2vr.d $vr0, $t3 + vldi $vr0, -1552 ori $t3, $zero, 14 ori $t7, $zero, 1 st.d $t4, $sp, 8 # 8-byte Folded Spill diff --git a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_nsis_LZMADecode.s b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_nsis_LZMADecode.s index 018a7c33..ce112aa7 100644 --- a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_nsis_LZMADecode.s +++ b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_nsis_LZMADecode.s @@ -611,8 +611,7 @@ lzmaDecode: # @lzmaDecode # Parent Loop BB1_3 Depth=2 # Parent Loop BB1_4 Depth=3 # => This Inner Loop Header: Depth=4 - ori $a4, $zero, 1024 - xvreplgr2vr.h $xr0, $a4 + xvldi $xr0, -2812 xvstx $xr0, $a3, $a1 addi.d $a1, $a1, -32 add.d $a4, $a2, $a1 diff --git a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_rebuildpe.s b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_rebuildpe.s index 1b14b710..37579adc 100644 --- a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_rebuildpe.s +++ b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_rebuildpe.s @@ -76,8 +76,7 @@ cli_rebuildpe: # @cli_rebuildpe addi.d $a2, $a1, 300 xvrepli.b $xr0, 0 xvrepli.w $xr1, 511 - ori $t0, $zero, 512 - xvreplgr2vr.w $xr2, $t0 + xvldi $xr2, -3838 xvrepli.w $xr3, -512 move $t0, $a0 xvori.b $xr4, $xr0, 0 @@ -154,8 +153,7 @@ cli_rebuildpe: # @cli_rebuildpe add.d $t0, $t0, $a1 addi.d $t0, $t0, 84 vrepli.w $vr1, 511 - ori $t1, $zero, 512 - vreplgr2vr.w $vr2, $t1 + vldi $vr2, -3838 vrepli.w $vr3, -512 .p2align 4, , 16 .LBB0_14: # %vec.epilog.vector.body diff --git a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_unsp.s b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_unsp.s index 6f284da8..63c719ed 100644 --- a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_unsp.s +++ b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_unsp.s @@ -223,8 +223,7 @@ very_real_unpack: # @very_real_unpack ori $a3, $zero, 3660 alsl.d $a0, $a0, $a3, 1 add.d $a3, $s7, $a0 - ori $a4, $zero, 1024 - xvreplgr2vr.h $xr0, $a4 + xvldi $xr0, -2812 addi.w $a4, $zero, -20 .p2align 4, , 16 .LBB1_3: # %vector.body diff --git a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_upack.s b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_upack.s index 5ad93372..feebf2be 100644 --- a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_upack.s +++ b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_upack.s @@ -486,8 +486,7 @@ unupack: # @unupack addi.d $a5, $a5, 256 add.d $a4, $a4, $a0 addi.d $a4, $a4, 60 - ori $a6, $zero, 1024 - xvreplgr2vr.w $xr0, $a6 + xvldi $xr0, -3836 .LBB0_71: # %vector.body1225 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a4, -32 @@ -1062,8 +1061,7 @@ unupack: # @unupack addi.d $a4, $a4, 256 ld.d $a5, $sp, 88 # 8-byte Folded Reload addi.d $a5, $a5, 56 - ori $a6, $zero, 1024 - xvreplgr2vr.w $xr0, $a6 + xvldi $xr0, -3836 .LBB0_145: # %vector.body # =>This Inner Loop Header: Depth=1 xvst $xr0, $a5, -32 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/fmo.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/fmo.s index 885b389a..8bcf5f87 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/fmo.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/fmo.s @@ -974,20 +974,19 @@ FmoGetLastCodedMBOfSliceGroup: # @FmoGetLastCodedMBOfSliceGroup vreplgr2vr.w $vr0, $a0 pcalau12i $a4, %pc_hi20(.LCPI8_0) vld $vr1, $a4, %pc_lo12(.LCPI8_0) - lu12i.w $a4, -524288 - vreplgr2vr.w $vr2, $a4 - addi.d $a5, $a3, 4 + addi.d $a4, $a3, 4 + vldi $vr2, -3200 vrepli.b $vr3, 0 - move $a6, $a1 - vori.b $vr4, $vr2, 0 + move $a5, $a1 + vldi $vr4, -3200 .p2align 4, , 16 .LBB8_5: # %vector.body # =>This Inner Loop Header: Depth=1 - ld.w $a7, $a5, -4 - ld.w $t0, $a5, 0 - vinsgr2vr.w $vr5, $a7, 0 + ld.w $a6, $a4, -4 + ld.w $a7, $a4, 0 + vinsgr2vr.w $vr5, $a6, 0 vaddi.wu $vr6, $vr1, 4 - vinsgr2vr.w $vr7, $t0, 0 + vinsgr2vr.w $vr7, $a7, 0 vilvl.b $vr5, $vr3, $vr5 vilvl.h $vr5, $vr3, $vr5 vilvl.b $vr7, $vr3, $vr7 @@ -997,22 +996,23 @@ FmoGetLastCodedMBOfSliceGroup: # @FmoGetLastCodedMBOfSliceGroup vbitsel.v $vr2, $vr2, $vr1, $vr5 vbitsel.v $vr4, $vr4, $vr6, $vr7 vaddi.wu $vr1, $vr1, 8 - addi.d $a6, $a6, -8 - addi.d $a5, $a5, 8 - bnez $a6, .LBB8_5 + addi.d $a5, $a5, -8 + addi.d $a4, $a4, 8 + bnez $a5, .LBB8_5 # %bb.6: # %middle.block vmax.w $vr0, $vr2, $vr4 vbsrl.v $vr1, $vr0, 8 vmax.w $vr0, $vr1, $vr0 vbsrl.v $vr1, $vr0, 4 vmax.w $vr0, $vr1, $vr0 - vpickve2gr.w $a5, $vr0, 0 - xor $a4, $a5, $a4 - sltui $a4, $a4, 1 - masknez $a5, $a5, $a4 + vpickve2gr.w $a4, $vr0, 0 + lu12i.w $a5, -524288 + xor $a5, $a4, $a5 + sltui $a5, $a5, 1 + masknez $a4, $a4, $a5 addi.d $a6, $zero, -1 - maskeqz $a4, $a6, $a4 - or $a4, $a4, $a5 + maskeqz $a5, $a6, $a5 + or $a4, $a5, $a4 beq $a1, $a2, .LBB8_9 .LBB8_7: # %scalar.ph.preheader add.d $a3, $a3, $a1 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/img_luma.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/img_luma.s index ca052076..f3239c29 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/img_luma.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/img_luma.s @@ -871,8 +871,7 @@ getVerSubImageSixTap: # @getVerSubImageSixTap slli.d $s0, $t3, 3 xvrepli.w $xr1, 20 xvrepli.w $xr2, -5 - ori $a6, $zero, 512 - xvreplgr2vr.w $xr3, $a6 + xvldi $xr3, -3838 move $a6, $t4 move $a7, $t5 move $s1, $t6 @@ -972,8 +971,7 @@ getVerSubImageSixTap: # @getVerSubImageSixTap slli.d $s0, $t3, 3 xvrepli.w $xr1, 20 xvrepli.w $xr2, -5 - ori $a6, $zero, 512 - xvreplgr2vr.w $xr3, $a6 + xvldi $xr3, -3838 move $s1, $t5 move $s2, $t6 move $s3, $t4 @@ -1088,8 +1086,7 @@ getVerSubImageSixTap: # @getVerSubImageSixTap ori $t5, $zero, 8 xvrepli.w $xr1, 20 xvrepli.w $xr2, -5 - ori $a1, $zero, 512 - xvreplgr2vr.w $xr3, $a1 + xvldi $xr3, -3838 b .LBB2_32 .p2align 4, , 16 .LBB2_31: # %..loopexit235_crit_edge.us @@ -1283,8 +1280,7 @@ getVerSubImageSixTap: # @getVerSubImageSixTap xvreplgr2vr.w $xr0, $t3 xvrepli.w $xr1, 20 xvrepli.w $xr2, -5 - ori $a1, $zero, 512 - xvreplgr2vr.w $xr3, $a1 + xvldi $xr3, -3838 move $s2, $t5 move $s3, $t6 move $s4, $t7 @@ -2085,8 +2081,7 @@ getVerSubImageSixTap: # @getVerSubImageSixTap xvreplgr2vr.w $xr0, $t3 xvrepli.w $xr1, 20 xvrepli.w $xr2, -5 - ori $a1, $zero, 512 - xvreplgr2vr.w $xr3, $a1 + xvldi $xr3, -3838 move $s2, $t5 move $s3, $t6 move $s4, $t7 @@ -2216,8 +2211,7 @@ getVerSubImageSixTap: # @getVerSubImageSixTap xvreplgr2vr.w $xr0, $a1 xvrepli.w $xr1, 20 xvrepli.w $xr2, -5 - ori $t0, $zero, 512 - xvreplgr2vr.w $xr3, $t0 + xvldi $xr3, -3838 move $t0, $a6 move $t1, $a5 move $t4, $a7 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/me_epzs.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/me_epzs.s index a54bdbd2..1ed2c41b 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/me_epzs.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/me_epzs.s @@ -1535,7 +1535,7 @@ EPZSSliceInit: # @EPZSSliceInit vrepli.w $vr1, -128 vrepli.w $vr2, 127 vrepli.w $vr3, 32 - vreplgr2vr.w $vr4, $s1 + vldi $vr4, -2553 vrepli.b $vr5, 0 vrepli.h $vr6, 1 b .LBB8_3 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rc_quadratic.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rc_quadratic.s index c928165d..d557f479 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rc_quadratic.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rc_quadratic.s @@ -3022,8 +3022,7 @@ rc_init_GOP: # @rc_init_GOP bstrpick.d $a7, $a5, 30, 2 slli.d $a7, $a7, 2 xvreplve0.d $xr1, $xr0 - lu52i.d $t2, $zero, 1022 - xvreplgr2vr.d $xr2, $t2 + xvldi $xr2, -928 move $t2, $a7 .p2align 4, , 16 .LBB8_26: # %vector.body diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/weighted_prediction.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/weighted_prediction.s index 51a47e52..e0259af6 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/weighted_prediction.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/weighted_prediction.s @@ -2117,7 +2117,7 @@ test_wp_B_slice: # @test_wp_B_slice vrepli.b $vr3, 0 vrepli.h $vr4, 1 vrepli.w $vr5, 32 - vreplgr2vr.w $vr6, $t6 + vldi $vr6, -2557 vrepli.w $vr7, -129 vrepli.w $vr8, -193 vrepli.w $vr9, 64 diff --git a/results/MultiSource/Applications/SIBsim4/CMakeFiles/SIBsim4.dir/align.s b/results/MultiSource/Applications/SIBsim4/CMakeFiles/SIBsim4.dir/align.s index 277e7c6a..e0f08e11 100644 --- a/results/MultiSource/Applications/SIBsim4/CMakeFiles/SIBsim4.dir/align.s +++ b/results/MultiSource/Applications/SIBsim4/CMakeFiles/SIBsim4.dir/align.s @@ -1370,67 +1370,67 @@ align_get_dist: # @align_get_dist move $s7, $a0 blt $s6, $s0, .LBB1_15 # %bb.3: # %iter.check - ori $a1, $zero, 3 - lu12i.w $a0, -524288 - move $a3, $s0 - bltu $fp, $a1, .LBB1_13 + ori $a0, $zero, 3 + move $a2, $s0 + bltu $fp, $a0, .LBB1_13 # %bb.4: # %vector.main.loop.iter.check - bstrpick.d $a1, $fp, 31, 0 - ori $a2, $zero, 15 - addi.d $a1, $a1, 1 - bgeu $fp, $a2, .LBB1_6 + bstrpick.d $a0, $fp, 31, 0 + ori $a1, $zero, 15 + addi.d $a0, $a0, 1 + bgeu $fp, $a1, .LBB1_6 # %bb.5: - move $a2, $zero + move $a1, $zero b .LBB1_10 .LBB1_6: # %vector.ph - bstrpick.d $a2, $a1, 32, 4 - slli.d $a2, $a2, 4 - addi.d $a3, $a6, 32 - xvreplgr2vr.w $xr0, $a0 - move $a4, $a2 + bstrpick.d $a1, $a0, 32, 4 + slli.d $a1, $a1, 4 + addi.d $a2, $a6, 32 + xvldi $xr0, -3200 + move $a3, $a1 .p2align 4, , 16 .LBB1_7: # %vector.body # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB1_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB1_7 # %bb.8: # %middle.block - beq $a1, $a2, .LBB1_15 + beq $a0, $a1, .LBB1_15 # %bb.9: # %vec.epilog.iter.check - andi $a3, $a1, 12 - beqz $a3, .LBB1_68 + andi $a2, $a0, 12 + beqz $a2, .LBB1_68 .LBB1_10: # %vec.epilog.ph - bstrpick.d $a3, $a1, 32, 2 - slli.d $a4, $a3, 2 - alsl.d $a3, $a3, $s0, 2 - alsl.d $a5, $a2, $a6, 2 - sub.d $a2, $a2, $a4 - vreplgr2vr.w $vr0, $a0 + bstrpick.d $a2, $a0, 32, 2 + slli.d $a3, $a2, 2 + alsl.d $a2, $a2, $s0, 2 + alsl.d $a4, $a1, $a6, 2 + sub.d $a1, $a1, $a3 + vldi $vr0, -3200 .p2align 4, , 16 .LBB1_11: # %vec.epilog.vector.body # =>This Inner Loop Header: Depth=1 - vst $vr0, $a5, 0 - addi.d $a2, $a2, 4 - addi.d $a5, $a5, 16 - bnez $a2, .LBB1_11 + vst $vr0, $a4, 0 + addi.d $a1, $a1, 4 + addi.d $a4, $a4, 16 + bnez $a1, .LBB1_11 # %bb.12: # %vec.epilog.middle.block - beq $a1, $a4, .LBB1_15 + beq $a0, $a3, .LBB1_15 .LBB1_13: # %.lr.ph.preheader - slli.d $a1, $a3, 2 - sub.d $a1, $a1, $t1 - add.d $a1, $a6, $a1 - sub.d $a2, $s6, $a3 - addi.d $a2, $a2, 1 - lu32i.d $a0, 0 + slli.d $a0, $a2, 2 + sub.d $a0, $a0, $t1 + add.d $a0, $a6, $a0 + sub.d $a1, $s6, $a2 + addi.d $a1, $a1, 1 + lu12i.w $a2, -524288 + lu32i.d $a2, 0 .p2align 4, , 16 .LBB1_14: # %.lr.ph # =>This Inner Loop Header: Depth=1 - st.w $a0, $a1, 0 - addi.w $a2, $a2, -1 - addi.d $a1, $a1, 4 - bnez $a2, .LBB1_14 + st.w $a2, $a0, 0 + addi.w $a1, $a1, -1 + addi.d $a0, $a0, 4 + bnez $a1, .LBB1_14 .LBB1_15: # %._crit_edge sub.d $a0, $a6, $t1 ld.d $t8, $sp, 112 # 8-byte Folded Reload @@ -1776,7 +1776,7 @@ align_get_dist: # @align_get_dist addi.d $sp, $sp, 208 ret .LBB1_68: - add.d $a3, $a2, $s0 + add.d $a2, $a1, $s0 b .LBB1_13 .Lfunc_end1: .size align_get_dist, .Lfunc_end1-align_get_dist diff --git a/results/MultiSource/Applications/obsequi/CMakeFiles/Obsequi.dir/negamax.s b/results/MultiSource/Applications/obsequi/CMakeFiles/Obsequi.dir/negamax.s index 7d099ad2..38697c06 100644 --- a/results/MultiSource/Applications/obsequi/CMakeFiles/Obsequi.dir/negamax.s +++ b/results/MultiSource/Applications/obsequi/CMakeFiles/Obsequi.dir/negamax.s @@ -67,9 +67,7 @@ search_for_move: # @search_for_move slli.d $a1, $a1, 4 xvrepli.b $xr0, 0 xvrepli.b $xr1, -1 - lu12i.w $a3, 15 - ori $a3, $a3, 4095 - xvreplgr2vr.w $xr2, $a3 + xvldi $xr2, -2305 pcalau12i $a3, %got_pc_hi20(countbits16) ld.d $a3, $a3, %got_pc_lo12(countbits16) move $a4, $a1 diff --git a/results/MultiSource/Applications/obsequi/CMakeFiles/Obsequi.dir/traits.s b/results/MultiSource/Applications/obsequi/CMakeFiles/Obsequi.dir/traits.s index f7303b73..f1918a19 100644 --- a/results/MultiSource/Applications/obsequi/CMakeFiles/Obsequi.dir/traits.s +++ b/results/MultiSource/Applications/obsequi/CMakeFiles/Obsequi.dir/traits.s @@ -77,9 +77,7 @@ write_node_info: # @write_node_info addi.d $a3, $a3, 40 xvrepli.b $xr1, 0 xvrepli.b $xr2, -1 - lu12i.w $a4, 15 - ori $a4, $a4, 4095 - xvreplgr2vr.w $xr3, $a4 + xvldi $xr3, -2305 pcalau12i $a4, %got_pc_hi20(countbits16) ld.d $a4, $a4, %got_pc_lo12(countbits16) move $a5, $a2 @@ -385,9 +383,7 @@ write_node_info: # @write_node_info addi.d $a4, $a4, 40 xvrepli.b $xr1, 0 xvrepli.b $xr2, -1 - lu12i.w $a5, 15 - ori $a5, $a5, 4095 - xvreplgr2vr.w $xr3, $a5 + xvldi $xr3, -2305 pcalau12i $a5, %got_pc_hi20(countbits16) ld.d $a5, $a5, %got_pc_lo12(countbits16) move $a6, $a3 @@ -726,9 +722,7 @@ write_node_info: # @write_node_info slli.d $a1, $a1, 4 xvrepli.b $xr0, 0 xvrepli.b $xr1, -1 - lu12i.w $a3, 15 - ori $a3, $a3, 4095 - xvreplgr2vr.w $xr2, $a3 + xvldi $xr2, -2305 pcalau12i $a3, %got_pc_hi20(countbits16) ld.d $a3, $a3, %got_pc_lo12(countbits16) move $a4, $a1 @@ -1053,9 +1047,7 @@ write_node_info: # @write_node_info add.d $a3, $s3, $a0 addi.d $a3, $a3, 36 xvrepli.b $xr1, 0 - lu12i.w $a4, 15 - ori $a4, $a4, 4095 - xvreplgr2vr.w $xr2, $a4 + xvldi $xr2, -2305 pcalau12i $a4, %got_pc_hi20(countbits16) ld.d $a4, $a4, %got_pc_lo12(countbits16) move $a5, $a2 @@ -1344,9 +1336,7 @@ write_node_info: # @write_node_info add.d $a2, $s3, $a2 addi.d $a2, $a2, 36 xvrepli.b $xr0, 0 - lu12i.w $a3, 15 - ori $a3, $a3, 4095 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -2305 pcalau12i $a3, %got_pc_hi20(countbits16) ld.d $a3, $a3, %got_pc_lo12(countbits16) move $a4, $a1 @@ -1727,9 +1717,7 @@ tr_non_safe_moves_a_little_touchy: # @tr_non_safe_moves_a_little_touchy add.d $a5, $a2, $a3 addi.d $a5, $a5, 40 xvrepli.b $xr1, 0 - lu12i.w $a6, 15 - ori $a6, $a6, 4095 - xvreplgr2vr.w $xr2, $a6 + xvldi $xr2, -2305 pcalau12i $a6, %got_pc_hi20(countbits16) ld.d $a6, $a6, %got_pc_lo12(countbits16) move $a7, $a4 diff --git a/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s b/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s index e67ffbd7..f53d4a84 100644 --- a/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s +++ b/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s @@ -25907,10 +25907,9 @@ ov_crosslap: # @ov_crosslap sub.d $t3, $zero, $t1 ori $t4, $zero, 4 vldi $vr0, -1168 - lu12i.w $t5, 260096 - vreplgr2vr.w $vr1, $t5 - xvreplgr2vr.w $xr2, $t5 ori $t5, $zero, 16 + vldi $vr1, -1424 + xvldi $xr2, -1424 b .LBB132_48 .p2align 4, , 16 .LBB132_47: # %._crit_edge.us.i @@ -27447,10 +27446,9 @@ _ov_64_seek_lap: # @_ov_64_seek_lap sub.d $t3, $zero, $t1 ori $t4, $zero, 4 vldi $vr0, -1168 - lu12i.w $t5, 260096 - vreplgr2vr.w $vr1, $t5 - xvreplgr2vr.w $xr2, $t5 ori $t5, $zero, 16 + vldi $vr1, -1424 + xvldi $xr2, -1424 b .LBB138_45 .p2align 4, , 16 .LBB138_44: # %._crit_edge.us.i @@ -28016,10 +28014,9 @@ _ov_d_seek_lap: # @_ov_d_seek_lap sub.d $t3, $zero, $t1 ori $t4, $zero, 4 vldi $vr0, -1168 - lu12i.w $t5, 260096 - vreplgr2vr.w $vr1, $t5 - xvreplgr2vr.w $xr2, $t5 ori $t5, $zero, 16 + vldi $vr1, -1424 + xvldi $xr2, -1424 b .LBB142_45 .p2align 4, , 16 .LBB142_44: # %._crit_edge.us.i @@ -65673,10 +65670,8 @@ floor1_interpolate_fit: # @floor1_interpolate_fit xvreplgr2vr.w $xr1, $a4 add.d $a7, $s3, $fp addi.d $a7, $a7, 16 - lu12i.w $t0, 7 - ori $t0, $t0, 4095 - xvreplgr2vr.w $xr2, $t0 - xvreplgr2vr.w $xr3, $a5 + xvldi $xr2, -2433 + xvldi $xr3, -3712 xvrepli.b $xr4, -1 move $t0, $a6 move $t1, $a3 @@ -65978,15 +65973,13 @@ floor1_encode: # @floor1_encode vseqi.w $vr1, $vr3, 3 vseqi.w $vr2, $vr3, 2 vseqi.w $vr3, $vr3, 1 - lu12i.w $a4, 7 - ori $a4, $a4, 4095 - vreplgr2vr.w $vr4, $a4 + vldi $vr4, -2433 lu12i.w $a4, 349525 ori $a4, $a4, 1366 lu32i.d $a4, 349525 lu52i.d $a4, $a4, 341 vrepli.b $vr5, 0 - vreplgr2vr.w $vr6, $a0 + vldi $vr6, -3712 move $a5, $a2 move $a6, $a3 .p2align 4, , 16 @@ -73762,10 +73755,8 @@ mapping0_forward: # @mapping0_forward ori $a0, $a0, 2341 st.d $a0, $fp, -208 # 8-byte Folded Spill lu12i.w $s3, 8 - lu12i.w $a0, 7 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr7, $a0 - xvreplgr2vr.w $xr8, $s3 + xvldi $xr7, -2433 + xvldi $xr8, -3712 xvrepli.b $xr9, -1 xvst $xr6, $fp, -528 # 32-byte Folded Spill xvst $xr7, $fp, -288 # 32-byte Folded Spill @@ -81801,8 +81792,7 @@ dradb3: # @dradb3 addi.d $s5, $t8, 8 addi.d $s6, $t7, 8 ori $s7, $zero, 48 - lu12i.w $a6, -266240 - xvreplgr2vr.w $xr0, $a6 + xvldi $xr0, -3137 lu12i.w $a6, 259547 ori $a6, $a6, 983 xvreplgr2vr.w $xr1, $a6 @@ -82041,11 +82031,9 @@ dradb3: # @dradb3 addi.d $a6, $a6, 1 or $a1, $a1, $a3 st.d $a1, $sp, 88 # 8-byte Folded Spill - lu12i.w $a1, 258048 - xvreplgr2vr.w $xr0, $a1 lu12i.w $a1, 259547 ori $a1, $a1, 983 - xvreplgr2vr.w $xr1, $a1 + xvreplgr2vr.w $xr0, $a1 st.d $a6, $sp, 48 # 8-byte Folded Spill bstrpick.d $a1, $a6, 62, 3 slli.d $a3, $a1, 3 @@ -82064,7 +82052,8 @@ dradb3: # @dradb3 st.d $a3, $sp, 24 # 8-byte Folded Spill addi.d $a2, $a2, -12 st.d $a2, $sp, 424 # 8-byte Folded Spill - vldi $vr2, -1184 + vldi $vr1, -1184 + xvldi $xr2, -3265 xvrepli.d $xr3, -2 xvrepli.b $xr4, -1 slli.d $a2, $a0, 2 @@ -82131,11 +82120,11 @@ dradb3: # @dradb3 fst.s $fa6, $t6, -4 fld.s $fa6, $t5, 4 fld.s $ft0, $t4, 4 - fmul.s $fa5, $fa5, $fa2 + fmul.s $fa5, $fa5, $fa1 fldx.s $ft1, $t3, $a1 fsub.s $fa5, $fa7, $fa5 fsub.s $fa6, $fa6, $ft0 - fmul.s $fa7, $fa6, $fa2 + fmul.s $fa7, $fa6, $fa1 fadd.s $fa6, $ft1, $fa6 fstx.s $fa6, $a2, $a1 fldx.s $fa6, $t1, $a1 @@ -82773,7 +82762,7 @@ dradb3: # @dradb3 xvinsve0.w $xr9, $xr13, 5 xvinsve0.w $xr9, $xr14, 6 xvinsve0.w $xr9, $xr15, 7 - xvfmul.s $xr7, $xr8, $xr0 + xvfmul.s $xr7, $xr8, $xr2 xvfsub.s $xr7, $xr9, $xr7 xvfadd.s $xr8, $xr9, $xr8 xvstelm.w $xr8, $a2, -28, 0 @@ -82830,7 +82819,7 @@ dradb3: # @dradb3 xvinsve0.w $xr10, $xr14, 5 xvinsve0.w $xr10, $xr15, 6 xvinsve0.w $xr10, $xr16, 7 - xvfmul.s $xr8, $xr9, $xr0 + xvfmul.s $xr8, $xr9, $xr2 xvfsub.s $xr8, $xr10, $xr8 xvfadd.s $xr9, $xr10, $xr9 xvstelm.w $xr9, $a2, -24, 0 @@ -82872,7 +82861,7 @@ dradb3: # @dradb3 xvinsve0.w $xr10, $xr16, 6 xvinsve0.w $xr10, $xr17, 7 xvfsub.s $xr9, $xr9, $xr10 - xvfmul.s $xr11, $xr9, $xr1 + xvfmul.s $xr11, $xr9, $xr0 fld.s $ft1, $s4, -24 fld.s $ft2, $s4, -16 fld.s $ft4, $s4, -8 @@ -82904,7 +82893,7 @@ dradb3: # @dradb3 xvinsve0.w $xr10, $xr17, 6 xvinsve0.w $xr10, $xr18, 7 xvfadd.s $xr9, $xr9, $xr10 - xvfmul.s $xr10, $xr9, $xr1 + xvfmul.s $xr10, $xr9, $xr0 xvfsub.s $xr9, $xr7, $xr10 xvfadd.s $xr7, $xr7, $xr10 xvfadd.s $xr10, $xr8, $xr11 diff --git a/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/driver.s b/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/driver.s index fddbfd32..868aefc5 100644 --- a/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/driver.s +++ b/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/driver.s @@ -651,11 +651,10 @@ matgen: # @matgen slli.d $a5, $a5, 5 xvreplve0.w $xr2, $xr0 xvreplve0.w $xr3, $xr1 - lu12i.w $a6, 264192 - xvreplgr2vr.w $xr4, $a6 ori $a6, $zero, 8 - xvrepli.b $xr5, 0 - xvrepli.b $xr6, -1 + xvrepli.b $xr4, 0 + xvrepli.b $xr5, -1 + xvldi $xr6, -1520 move $s7, $fp b .LBB1_41 .p2align 4, , 16 @@ -700,7 +699,7 @@ matgen: # @matgen .LBB1_45: # %vector.body716 # Parent Loop BB1_41 Depth=1 # => This Inner Loop Header: Depth=2 - xvst $xr5, $t1, -16 + xvst $xr4, $t1, -16 xvpermi.q $xr12, $xr8, 1 vpickve2gr.w $t3, $vr12, 0 bstrpick.d $t3, $t3, 31, 0 @@ -744,7 +743,7 @@ matgen: # @matgen xvinsgr2vr.w $xr12, $t3, 6 xvpickve2gr.d $t3, $xr15, 3 xvinsgr2vr.w $xr12, $t3, 7 - xvxor.v $xr14, $xr14, $xr6 + xvxor.v $xr14, $xr14, $xr5 xvpickve2gr.d $t3, $xr14, 0 xvinsgr2vr.w $xr16, $t3, 0 xvpickve2gr.d $t3, $xr14, 1 @@ -753,7 +752,7 @@ matgen: # @matgen xvinsgr2vr.w $xr16, $t3, 2 xvpickve2gr.d $t3, $xr14, 3 xvinsgr2vr.w $xr16, $t3, 3 - xvxor.v $xr14, $xr15, $xr6 + xvxor.v $xr14, $xr15, $xr5 xvpickve2gr.d $t3, $xr14, 0 xvinsgr2vr.w $xr16, $t3, 4 xvpickve2gr.d $t3, $xr14, 1 @@ -787,7 +786,7 @@ matgen: # @matgen xvpickve2gr.w $t3, $xr13, 0 xvbitsel.v $xr11, $xr3, $xr2, $xr11 andi $t4, $t3, 1 - xvbitsel.v $xr11, $xr11, $xr4, $xr12 + xvbitsel.v $xr11, $xr11, $xr6, $xr12 beqz $t4, .LBB1_47 # %bb.46: # %pred.store.if # in Loop: Header=BB1_45 Depth=2 diff --git a/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/C/LzmaDec.s b/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/C/LzmaDec.s index 7779b427..c4f133d9 100644 --- a/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/C/LzmaDec.s +++ b/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/C/LzmaDec.s @@ -140,8 +140,7 @@ LzmaDec_DecodeToDic: # @LzmaDec_DecodeToDic .LBB2_12: # %.lr.ph232 addi.d $s6, $s0, 112 ori $s8, $zero, 18 - ori $a1, $zero, 1024 - xvreplgr2vr.h $xr0, $a1 + xvldi $xr0, -2812 lu12i.w $a1, 16384 ori $a1, $a1, 1024 st.d $a1, $sp, 16 # 8-byte Folded Spill diff --git a/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/C/LzmaEnc.s b/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/C/LzmaEnc.s index ff0ff5ee..dfa1dfbf 100644 --- a/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/C/LzmaEnc.s +++ b/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/C/LzmaEnc.s @@ -1809,12 +1809,12 @@ LzmaEnc_Init: # @LzmaEnc_Init st.d $t2, $a7, 24 st.d $zero, $a7, 56 st.w $zero, $a7, 64 - ori $a6, $zero, 1024 - xvreplgr2vr.h $xr0, $a6 - ori $a7, $zero, 3196 - xvstx $xr0, $a1, $a7 + xvldi $xr0, -2812 + ori $a6, $zero, 3196 + xvstx $xr0, $a1, $a6 xvst $xr0, $a2, 94 ori $a7, $zero, 3580 + ori $a6, $zero, 1024 stx.h $a6, $a1, $a7 st.h $a6, $a2, 22 st.h $a6, $a2, 46 diff --git a/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/Compress/Bcj2Coder.s b/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/Compress/Bcj2Coder.s index b3fadd2d..c12b17ea 100644 --- a/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/Compress/Bcj2Coder.s +++ b/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/Compress/Bcj2Coder.s @@ -543,8 +543,7 @@ _ZN9NCompress5NBcj28CEncoder8CodeRealEPP19ISequentialInStreamPPKyjPP20ISequentia ori $a0, $zero, 1 st.w $a0, $fp, 192 st.b $zero, $fp, 196 - ori $a0, $zero, 1024 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3836 xvst $xr0, $fp, 272 xvst $xr0, $fp, 304 xvst $xr0, $fp, 336 @@ -577,6 +576,7 @@ _ZN9NCompress5NBcj28CEncoder8CodeRealEPP19ISequentialInStreamPPKyjPP20ISequentia xvst $xr0, $fp, 1200 xvst $xr0, $fp, 1232 xvst $xr0, $fp, 1264 + ori $a0, $zero, 1024 lu32i.d $a0, 1024 st.d $a0, $fp, 1296 st.d $zero, $sp, 168 @@ -1637,8 +1637,7 @@ _ZN9NCompress5NBcj28CDecoderC2Ev: # @_ZN9NCompress5NBcj28CDecoderC2Ev st.d $zero, $s0, 1296 lu12i.w $a0, 16 st.w $a0, $s0, 1328 - lu12i.w $a0, 256 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3568 vst $vr0, $s0, 1312 ld.d $s2, $sp, 8 # 8-byte Folded Reload ld.d $s1, $sp, 16 # 8-byte Folded Reload @@ -1949,8 +1948,7 @@ _ZN9NCompress5NBcj28CDecoder8CodeRealEPP19ISequentialInStreamPPKyjPP20ISequentia move $s3, $zero addi.d $a0, $s0, 224 st.d $a0, $sp, 32 # 8-byte Folded Spill - ori $a0, $zero, 1024 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3836 xvst $xr0, $s0, 224 xvst $xr0, $s0, 256 xvst $xr0, $s0, 288 @@ -1983,6 +1981,7 @@ _ZN9NCompress5NBcj28CDecoder8CodeRealEPP19ISequentialInStreamPPKyjPP20ISequentia xvst $xr0, $s0, 1152 xvst $xr0, $s0, 1184 xvst $xr0, $s0, 1216 + ori $a0, $zero, 1024 lu32i.d $a0, 1024 st.d $a0, $s0, 1248 b .LBB14_21 diff --git a/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/UI/Common/Bench.s b/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/UI/Common/Bench.s index bda614cf..6daf5529 100644 --- a/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/UI/Common/Bench.s +++ b/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/UI/Common/Bench.s @@ -508,8 +508,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr6, $a7, %pc_lo12(.LCPI7_3) xvreplgr2vr.w $xr0, $a0 addi.w $a0, $zero, -32 - ori $t1, $zero, 512 - xvreplgr2vr.w $xr7, $t1 + xvldi $xr7, -3838 xvrepli.w $xr1, 32 ori $t1, $zero, 192 .p2align 4, , 16 @@ -679,8 +678,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - ori $t0, $zero, 1024 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3836 ori $t0, $zero, 192 .p2align 4, , 16 .LBB7_8: # %vector.body224 @@ -849,8 +847,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - ori $t0, $zero, 2048 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3832 ori $t0, $zero, 192 .p2align 4, , 16 .LBB7_13: # %vector.body236 @@ -1019,8 +1016,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 1 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3824 ori $t0, $zero, 192 .p2align 4, , 16 .LBB7_18: # %vector.body248 @@ -1189,8 +1185,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 2 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3808 ori $t0, $zero, 192 .p2align 4, , 16 .LBB7_23: # %vector.body260 @@ -1359,8 +1354,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 4 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3776 ori $t0, $zero, 192 .LBB7_28: # %vector.body272 # =>This Inner Loop Header: Depth=1 @@ -1528,8 +1522,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 8 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3712 ori $t0, $zero, 192 .LBB7_33: # %vector.body284 # =>This Inner Loop Header: Depth=1 @@ -1697,8 +1690,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 16 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3583 ori $t0, $zero, 192 .LBB7_38: # %vector.body296 # =>This Inner Loop Header: Depth=1 @@ -1866,8 +1858,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 32 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3582 ori $t0, $zero, 192 .LBB7_43: # %vector.body308 # =>This Inner Loop Header: Depth=1 @@ -2035,8 +2026,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 64 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3580 ori $t0, $zero, 192 .LBB7_48: # %vector.body320 # =>This Inner Loop Header: Depth=1 @@ -2204,8 +2194,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 128 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3576 ori $t0, $zero, 192 .LBB7_53: # %vector.body332 # =>This Inner Loop Header: Depth=1 @@ -2371,8 +2360,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 256 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3568 ori $t0, $zero, 192 .LBB7_57: # %vector.body344 # =>This Inner Loop Header: Depth=1 @@ -2538,8 +2526,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 512 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3552 ori $t0, $zero, 192 .LBB7_61: # %vector.body356 # =>This Inner Loop Header: Depth=1 @@ -2705,8 +2692,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 1024 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3520 ori $t0, $zero, 192 .LBB7_65: # %vector.body368 # =>This Inner Loop Header: Depth=1 @@ -2872,8 +2858,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 2048 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3456 ori $t0, $zero, 192 .LBB7_69: # %vector.body380 # =>This Inner Loop Header: Depth=1 @@ -3039,8 +3024,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 4096 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3327 ori $t0, $zero, 192 .LBB7_73: # %vector.body392 # =>This Inner Loop Header: Depth=1 @@ -3206,8 +3190,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 8192 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3326 ori $t0, $zero, 192 .LBB7_77: # %vector.body404 # =>This Inner Loop Header: Depth=1 @@ -3373,8 +3356,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 16384 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3324 ori $t0, $zero, 192 .LBB7_81: # %vector.body416 # =>This Inner Loop Header: Depth=1 @@ -3540,8 +3522,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 32768 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3320 ori $t0, $zero, 192 .LBB7_85: # %vector.body428 # =>This Inner Loop Header: Depth=1 @@ -3707,8 +3688,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 65536 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3312 ori $t0, $zero, 192 .LBB7_89: # %vector.body440 # =>This Inner Loop Header: Depth=1 @@ -3874,8 +3854,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 131072 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3296 ori $t0, $zero, 192 .LBB7_93: # %vector.body452 # =>This Inner Loop Header: Depth=1 @@ -4041,8 +4020,7 @@ _Z17GetCompressRatingjyyy: # @_Z17GetCompressRatingjyyy xvld $xr5, $a6, %pc_lo12(.LCPI7_2) xvld $xr6, $a7, %pc_lo12(.LCPI7_3) addi.w $a0, $zero, -32 - lu12i.w $t0, 262144 - xvreplgr2vr.w $xr7, $t0 + xvldi $xr7, -3264 ori $t0, $zero, 192 .LBB7_97: # %vector.body464 # =>This Inner Loop Header: Depth=1 @@ -4828,17 +4806,15 @@ _ZN21CBenchRandomGenerator8GenerateEv: # @_ZN21CBenchRandomGenerator8GenerateEv # %bb.1: # %.lr.ph move $a7, $zero move $a1, $zero - ori $a3, $zero, 1 - lu12i.w $a2, 15 - ori $a2, $a2, 4095 - vreplgr2vr.w $vr0, $a2 + ori $a5, $zero, 1 + vldi $vr0, -2305 lu12i.w $a2, 9 ori $a2, $a2, 105 lu32i.d $a2, 18000 vreplgr2vr.d $vr1, $a2 lu12i.w $a2, 32 - ori $a4, $zero, 1024 - addi.d $a5, $zero, -2 + ori $a3, $zero, 1024 + addi.d $a4, $zero, -2 ori $a6, $zero, 24 b .LBB10_4 .p2align 4, , 16 @@ -4868,7 +4844,7 @@ _ZN21CBenchRandomGenerator8GenerateEv: # @_ZN21CBenchRandomGenerator8GenerateEv vstelm.w $vr2, $t0, 4, 1 add.w $t3, $t2, $t3 bstrpick.d $t2, $t3, 31, 2 - bltu $a1, $a4, .LBB10_2 + bltu $a1, $a3, .LBB10_2 # %bb.5: # in Loop: Header=BB10_4 Depth=1 and $t1, $t1, $a2 beqz $t1, .LBB10_2 @@ -4876,7 +4852,7 @@ _ZN21CBenchRandomGenerator8GenerateEv: # @_ZN21CBenchRandomGenerator8GenerateEv bstrpick.d $a7, $t3, 31, 4 andi $t1, $t2, 3 addi.d $t2, $t1, 1 - sll.w $t1, $a5, $t1 + sll.w $t1, $a4, $t1 andn $t3, $a7, $t1 srl.w $t1, $a7, $t2 andi $t2, $t1, 7 @@ -4891,15 +4867,15 @@ _ZN21CBenchRandomGenerator8GenerateEv: # @_ZN21CBenchRandomGenerator8GenerateEv b .LBB10_9 .p2align 4, , 16 .LBB10_8: # in Loop: Header=BB10_9 Depth=2 - addi.d $a3, $zero, -64 - sll.w $a3, $a3, $t4 + addi.d $a5, $zero, -64 + sll.w $a5, $a5, $t4 vand.v $vr4, $vr2, $vr0 vsrli.w $vr3, $vr2, 16 vmadd.w $vr3, $vr4, $vr1 - andn $a3, $t3, $a3 + andn $a5, $t3, $a5 vori.b $vr2, $vr3, 0 vpickve2gr.w $t3, $vr3, 1 - bltu $a3, $a1, .LBB10_11 + bltu $a5, $a1, .LBB10_11 .LBB10_9: # Parent Loop BB10_4 Depth=1 # => This Inner Loop Header: Depth=2 andi $t4, $t3, 31 @@ -4915,17 +4891,17 @@ _ZN21CBenchRandomGenerator8GenerateEv: # @_ZN21CBenchRandomGenerator8GenerateEv vori.b $vr3, $vr2, 0 vinsgr2vr.w $vr3, $t3, 1 vpickve2gr.w $t3, $vr3, 1 - bgeu $a3, $a1, .LBB10_9 + bgeu $a5, $a1, .LBB10_9 .LBB10_11: # in Loop: Header=BB10_4 Depth=1 - sll.w $t1, $a5, $t1 + sll.w $t1, $a4, $t1 andn $t1, $t2, $t1 add.d $a7, $t1, $a7 vstelm.w $vr3, $t0, 0, 0 vstelm.w $vr2, $t0, 4, 1 - addi.w $a3, $a3, 1 + addi.w $a5, $a5, 1 .LBB10_12: # in Loop: Header=BB10_4 Depth=1 add.w $t0, $a7, $a1 - sub.w $t1, $zero, $a3 + sub.w $t1, $zero, $a5 .p2align 4, , 16 .LBB10_13: # Parent Loop BB10_4 Depth=1 # => This Inner Loop Header: Depth=2 diff --git a/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/UI/Common/OpenArchive.s b/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/UI/Common/OpenArchive.s index b900a342..9ce90e52 100644 --- a/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/UI/Common/OpenArchive.s +++ b/results/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/UI/Common/OpenArchive.s @@ -2463,13 +2463,13 @@ _ZN4CArc10OpenStreamEP7CCodecsiP9IInStreamP19ISequentialInStreamP20IArchiveOpenC xvreplgr2vr.w $xr2, $s5 pcalau12i $a4, %pc_hi20(.LCPI6_0) xvld $xr3, $a4, %pc_lo12(.LCPI6_0) - xvreplgr2vr.w $xr5, $s6 - xvreplgr2vr.w $xr4, $a3 + xvreplgr2vr.w $xr4, $s6 + xvldi $xr5, -3200 addi.d $a4, $a1, 32 move $a5, $a2 - xvori.b $xr6, $xr4, 0 - xvori.b $xr0, $xr4, 0 - xvori.b $xr1, $xr4, 0 + xvori.b $xr6, $xr5, 0 + xvori.b $xr0, $xr5, 0 + xvori.b $xr1, $xr5, 0 .p2align 4, , 16 .LBB6_207: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -2480,16 +2480,16 @@ _ZN4CArc10OpenStreamEP7CCodecsiP9IInStreamP19ISequentialInStreamP20IArchiveOpenC xvseq.w $xr11, $xr8, $xr2 xvbitsel.v $xr0, $xr0, $xr3, $xr10 xvbitsel.v $xr1, $xr1, $xr9, $xr11 - xvseq.w $xr7, $xr7, $xr5 - xvseq.w $xr8, $xr8, $xr5 - xvbitsel.v $xr4, $xr4, $xr3, $xr7 + xvseq.w $xr7, $xr7, $xr4 + xvseq.w $xr8, $xr8, $xr4 + xvbitsel.v $xr5, $xr5, $xr3, $xr7 xvbitsel.v $xr6, $xr6, $xr9, $xr8 xvaddi.wu $xr3, $xr3, 16 addi.d $a5, $a5, -16 addi.d $a4, $a4, 64 bnez $a5, .LBB6_207 # %bb.208: # %middle.block - xvmax.w $xr2, $xr4, $xr6 + xvmax.w $xr2, $xr5, $xr6 xvpermi.q $xr3, $xr2, 1 vmax.w $vr2, $vr2, $vr3 vbsrl.v $vr3, $vr2, 8 diff --git a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/cyclic_reduction.s b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/cyclic_reduction.s index c3ebccdb..d0da445e 100644 --- a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/cyclic_reduction.s +++ b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/cyclic_reduction.s @@ -1091,8 +1091,7 @@ hypre_CycRedSetupCoarseOp: # @hypre_CycRedSetupCoarseOp ori $s7, $zero, 1 ori $s8, $zero, 4 xvrepli.b $xr3, 0 - lu52i.d $a0, $zero, 1024 - xvreplgr2vr.d $xr4, $a0 + xvldi $xr4, -1024 xvst $xr3, $sp, 272 # 32-byte Folded Spill xvst $xr4, $sp, 240 # 32-byte Folded Spill b .LBB2_67 diff --git a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg2000.s b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg2000.s index 219a74ef..4961a183 100644 --- a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg2000.s +++ b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg2000.s @@ -1445,8 +1445,7 @@ main: # @main bstrpick.d $a0, $s5, 30, 3 slli.d $a0, $a0, 3 addi.d $a1, $fp, 32 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 move $a2, $a0 .p2align 4, , 16 .LBB0_154: # %vector.body970 diff --git a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg2_setup_rap.s b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg2_setup_rap.s index 0df37155..e25217fe 100644 --- a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg2_setup_rap.s +++ b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg2_setup_rap.s @@ -4597,8 +4597,7 @@ hypre_SMG2RAPPeriodicSym: # @hypre_SMG2RAPPeriodicSym lu32i.d $a0, -1 st.d $a0, $sp, 104 # 8-byte Folded Spill xvrepli.b $xr6, 0 - lu52i.d $a0, $zero, 1024 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1024 xvst $xr0, $sp, 16 # 32-byte Folded Spill st.d $s0, $sp, 48 # 8-byte Folded Spill st.d $fp, $sp, 128 # 8-byte Folded Spill diff --git a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg3_setup_rap.s b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg3_setup_rap.s index 798536b8..3d4a49f2 100644 --- a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg3_setup_rap.s +++ b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg3_setup_rap.s @@ -5828,8 +5828,7 @@ hypre_SMG3RAPPeriodicSym: # @hypre_SMG3RAPPeriodicSym st.d $a2, $sp, 120 # 8-byte Folded Spill ori $s7, $zero, 16 xvrepli.b $xr6, 0 - lu52i.d $a0, $zero, 1024 - xvreplgr2vr.d $xr7, $a0 + xvldi $xr7, -1024 # implicit-def: $r4 # kill: killed $r4 # implicit-def: $r4 diff --git a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/struct_matrix.s b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/struct_matrix.s index 24dae140..831187e6 100644 --- a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/struct_matrix.s +++ b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/struct_matrix.s @@ -553,7 +553,7 @@ hypre_StructMatrixInitializeData: # @hypre_StructMatrixInitializeData addi.w $s3, $zero, -1 ori $s4, $zero, 8 lu52i.d $s5, $zero, 1023 - xvreplgr2vr.d $xr0, $s5 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill b .LBB5_3 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/laplace.s b/results/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/laplace.s index 934f71bb..cdddb033 100644 --- a/results/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/laplace.s +++ b/results/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/laplace.s @@ -573,8 +573,7 @@ GenerateSeqLaplacian: # @GenerateSeqLaplacian addi.d $a2, $a0, 32 addi.d $a3, $s1, 32 xvrepli.b $xr0, 0 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr1, $a4 + xvldi $xr1, -912 move $a4, $a5 .p2align 4, , 16 .LBB0_72: # %vector.body diff --git a/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_div.s b/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_div.s index d4ba774f..3811ffbb 100644 --- a/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_div.s +++ b/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_div.s @@ -49,17 +49,13 @@ Crystal_div: # @Crystal_div slli.d $t1, $t0, 2 addi.d $t2, $sp, 8 addi.d $t3, $sp, 104 - lu52i.d $t4, $zero, 1023 - xvreplgr2vr.d $xr2, $t4 + xvldi $xr2, -912 lu12i.w $t4, -419431 ori $t4, $t4, 2458 lu32i.d $t4, -419431 lu52i.d $t4, $t4, 1020 xvreplgr2vr.d $xr3, $t4 - ori $t4, $zero, 0 - lu32i.d $t4, -524288 - lu52i.d $t4, $t4, 1026 - xvreplgr2vr.d $xr4, $t4 + xvldi $xr4, -984 lu12i.w $t4, -209716 ori $t4, $t4, 3277 lu32i.d $t4, -209716 diff --git a/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_pow.s b/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_pow.s index c42c00ca..b842c26c 100644 --- a/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_pow.s +++ b/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_pow.s @@ -49,8 +49,7 @@ Crystal_pow: # @Crystal_pow slli.d $a0, $a0, 2 addi.d $a1, $sp, 112 addi.d $a2, $sp, 16 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -912 lu12i.w $a3, -419431 ori $a3, $a3, 2458 lu32i.d $a3, -419431 @@ -61,10 +60,7 @@ Crystal_pow: # @Crystal_pow lu32i.d $a3, -209716 lu52i.d $a3, $a3, 1022 xvreplgr2vr.d $xr3, $a3 - ori $a3, $zero, 0 - lu32i.d $a3, -524288 - lu52i.d $a3, $a3, 1026 - xvreplgr2vr.d $xr4, $a3 + xvldi $xr4, -984 move $a3, $a0 .p2align 4, , 16 .LBB0_4: # %vector.body diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCollisionWorld.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCollisionWorld.s index d8c7d298..710f44e6 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCollisionWorld.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCollisionWorld.s @@ -3807,8 +3807,7 @@ _ZNK11btMatrix3x311getRotationER12btQuaternion: # @_ZNK11btMatrix3x311getRotatio fsub.s $fa2, $fa2, $fa5 vextrins.w $vr4, $vr0, 16 vshuf4i.w $vr0, $vr4, 64 - lu12i.w $a0, 258048 - vreplgr2vr.w $vr4, $a0 + vldi $vr4, -3265 vextrins.w $vr4, $vr1, 0 vextrins.w $vr4, $vr3, 16 vextrins.w $vr4, $vr2, 32 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConeTwistConstraint.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConeTwistConstraint.s index 4069ee6c..c50a1c63 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConeTwistConstraint.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConeTwistConstraint.s @@ -6234,8 +6234,7 @@ _ZNK11btMatrix3x311getRotationER12btQuaternion: # @_ZNK11btMatrix3x311getRotatio fsub.s $fa2, $fa2, $fa5 vextrins.w $vr4, $vr0, 16 vshuf4i.w $vr0, $vr4, 64 - lu12i.w $a0, 258048 - vreplgr2vr.w $vr4, $a0 + vldi $vr4, -3265 vextrins.w $vr4, $vr1, 0 vextrins.w $vr4, $vr3, 16 vextrins.w $vr4, $vr2, 32 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContinuousConvexCollision.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContinuousConvexCollision.s index 702bf8f5..bc7b3c1f 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContinuousConvexCollision.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContinuousConvexCollision.s @@ -1154,8 +1154,7 @@ _ZNK11btMatrix3x311getRotationER12btQuaternion: # @_ZNK11btMatrix3x311getRotatio fsub.s $fa2, $fa2, $fa5 vextrins.w $vr4, $vr0, 16 vshuf4i.w $vr0, $vr4, 64 - lu12i.w $a0, 258048 - vreplgr2vr.w $vr4, $a0 + vldi $vr4, -3265 vextrins.w $vr4, $vr1, 0 vextrins.w $vr4, $vr3, 16 vextrins.w $vr4, $vr2, 32 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexConvexAlgorithm.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexConvexAlgorithm.s index efef48a9..46169796 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexConvexAlgorithm.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexConvexAlgorithm.s @@ -2387,8 +2387,7 @@ _ZNK11btMatrix3x311getRotationER12btQuaternion: # @_ZNK11btMatrix3x311getRotatio fsub.s $fa2, $fa2, $fa5 vextrins.w $vr4, $vr0, 16 vshuf4i.w $vr0, $vr4, 64 - lu12i.w $a0, 258048 - vreplgr2vr.w $vr4, $a0 + vldi $vr4, -3265 vextrins.w $vr4, $vr1, 0 vextrins.w $vr4, $vr3, 16 vextrins.w $vr4, $vr2, 32 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDiscreteDynamicsWorld.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDiscreteDynamicsWorld.s index 90a35fe1..31dceddc 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDiscreteDynamicsWorld.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDiscreteDynamicsWorld.s @@ -10641,8 +10641,7 @@ _ZNK11btMatrix3x311getRotationER12btQuaternion: # @_ZNK11btMatrix3x311getRotatio fsub.s $fa2, $fa2, $fa5 vextrins.w $vr4, $vr0, 16 vshuf4i.w $vr0, $vr4, 64 - lu12i.w $a0, 258048 - vreplgr2vr.w $vr4, $a0 + vldi $vr4, -3265 vextrins.w $vr4, $vr1, 0 vextrins.w $vr4, $vr3, 16 vextrins.w $vr4, $vr2, 32 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactBvh.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactBvh.s index ed2ec396..15af8523 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactBvh.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactBvh.s @@ -14,11 +14,10 @@ _ZN9btBvhTree20_calc_splitting_axisER18GIM_BVH_DATA_ARRAYii: # @_ZN9btBvhTree20_ alsl.d $a0, $a2, $a0, 2 add.d $a6, $a0, $a6 addi.d $a6, $a6, 20 - vrepli.b $vr2, 0 - movgr2fr.w $fa3, $zero - vldi $vr1, -1184 - lu12i.w $a7, 258048 - vreplgr2vr.w $vr0, $a7 + vrepli.b $vr1, 0 + movgr2fr.w $fa2, $zero + vldi $vr0, -1184 + vldi $vr3, -3265 .p2align 4, , 16 .LBB0_2: # =>This Inner Loop Header: Depth=1 fld.s $fa4, $a6, -4 @@ -26,30 +25,31 @@ _ZN9btBvhTree20_calc_splitting_axisER18GIM_BVH_DATA_ARRAYii: # @_ZN9btBvhTree20_ ld.d $a7, $a6, 0 ld.d $t0, $a6, -16 fadd.s $fa4, $fa4, $fa5 - fmul.s $fa4, $fa4, $fa1 + fmul.s $fa4, $fa4, $fa0 vinsgr2vr.d $vr5, $a7, 0 vinsgr2vr.d $vr6, $t0, 0 vfadd.s $vr5, $vr5, $vr6 - vfmul.s $vr5, $vr5, $vr0 - fadd.s $fa3, $fa3, $fa4 - vfadd.s $vr2, $vr2, $vr5 + vfmul.s $vr5, $vr5, $vr3 + fadd.s $fa2, $fa2, $fa4 + vfadd.s $vr1, $vr1, $vr5 addi.d $a5, $a5, -1 addi.d $a6, $a6, 36 bnez $a5, .LBB0_2 # %bb.3: # %.lr.ph134 - movgr2fr.w $fa1, $a4 - ffint.s.w $fa1, $fa1 - frecip.s $fa5, $fa1 + movgr2fr.w $fa0, $a4 + ffint.s.w $fa0, $fa0 + frecip.s $fa4, $fa0 ld.d $a1, $a1, 16 - fmul.s $fa4, $fa5, $fa3 - vextrins.w $vr5, $vr5, 16 - vfmul.s $vr5, $vr5, $vr2 + fmul.s $fa3, $fa4, $fa2 + vextrins.w $vr4, $vr4, 16 + vfmul.s $vr4, $vr4, $vr1 add.d $a0, $a0, $a1 addi.d $a0, $a0, 20 sub.d $a1, $a3, $a2 - vrepli.b $vr2, 0 - movgr2fr.w $fa3, $zero - vldi $vr6, -1184 + vrepli.b $vr1, 0 + movgr2fr.w $fa2, $zero + vldi $vr5, -1184 + vldi $vr6, -3265 .p2align 4, , 16 .LBB0_4: # =>This Inner Loop Header: Depth=1 fld.s $fa7, $a0, -4 @@ -58,36 +58,36 @@ _ZN9btBvhTree20_calc_splitting_axisER18GIM_BVH_DATA_ARRAYii: # @_ZN9btBvhTree20_ fadd.s $fa7, $fa7, $ft0 vinsgr2vr.d $vr8, $a2, 0 ld.d $a2, $a0, -16 - fmul.s $fa7, $fa7, $fa6 - fsub.s $fa7, $fa7, $fa4 + fmul.s $fa7, $fa7, $fa5 + fsub.s $fa7, $fa7, $fa3 fmul.s $fa7, $fa7, $fa7 vinsgr2vr.d $vr9, $a2, 0 vfadd.s $vr8, $vr8, $vr9 - vfmul.s $vr8, $vr8, $vr0 - vfsub.s $vr8, $vr8, $vr5 + vfmul.s $vr8, $vr8, $vr6 + vfsub.s $vr8, $vr8, $vr4 vfmul.s $vr8, $vr8, $vr8 - fadd.s $fa3, $fa3, $fa7 - vfadd.s $vr2, $vr2, $vr8 + fadd.s $fa2, $fa2, $fa7 + vfadd.s $vr1, $vr1, $vr8 addi.d $a1, $a1, -1 addi.d $a0, $a0, 36 bnez $a1, .LBB0_4 b .LBB0_6 .LBB0_5: # %._crit_edge movgr2fr.w $fa0, $a4 - ffint.s.w $fa1, $fa0 - vrepli.b $vr2, 0 - movgr2fr.w $fa3, $zero + ffint.s.w $fa0, $fa0 + vrepli.b $vr1, 0 + movgr2fr.w $fa2, $zero .LBB0_6: # %._crit_edge135 - vldi $vr0, -1040 - fadd.s $fa0, $fa1, $fa0 + vldi $vr3, -1040 + fadd.s $fa0, $fa0, $fa3 frecip.s $fa0, $fa0 - fmul.s $fa1, $fa0, $fa3 - vreplvei.w $vr3, $vr2, 0 + fmul.s $fa2, $fa0, $fa2 + vreplvei.w $vr3, $vr1, 0 fmul.s $fa3, $fa0, $fa3 - vreplvei.w $vr2, $vr2, 1 - fmul.s $fa0, $fa0, $fa2 - fcmp.clt.s $fcc0, $fa1, $fa3 - fsel $fa1, $fa1, $fa3, $fcc0 + vreplvei.w $vr1, $vr1, 1 + fmul.s $fa0, $fa0, $fa1 + fcmp.clt.s $fcc0, $fa2, $fa3 + fsel $fa1, $fa2, $fa3, $fcc0 fcmp.clt.s $fcc1, $fa1, $fa0 movcf2gr $a0, $fcc0 movcf2gr $a1, $fcc1 @@ -119,8 +119,7 @@ _ZN9btBvhTree30_sort_and_calc_splitting_indexER18GIM_BVH_DATA_ARRAYiii: # @_ZN9b vrepli.b $vr0, 0 movgr2fr.w $fa1, $zero vldi $vr2, -1184 - lu12i.w $t1, 258048 - vreplgr2vr.w $vr3, $t1 + vldi $vr3, -3265 .p2align 4, , 16 .LBB1_2: # =>This Inner Loop Header: Depth=1 fld.s $fa4, $a6, -4 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactQuantizedBvh.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactQuantizedBvh.s index 2ac66486..a1289c87 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactQuantizedBvh.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactQuantizedBvh.s @@ -112,11 +112,10 @@ _ZN18btQuantizedBvhTree20_calc_splitting_axisER18GIM_BVH_DATA_ARRAYii: # @_ZN18b alsl.d $a0, $a2, $a0, 2 add.d $a6, $a0, $a6 addi.d $a6, $a6, 20 - vrepli.b $vr2, 0 - movgr2fr.w $fa3, $zero - vldi $vr1, -1184 - lu12i.w $a7, 258048 - vreplgr2vr.w $vr0, $a7 + vrepli.b $vr1, 0 + movgr2fr.w $fa2, $zero + vldi $vr0, -1184 + vldi $vr3, -3265 .p2align 4, , 16 .LBB1_2: # =>This Inner Loop Header: Depth=1 fld.s $fa4, $a6, -4 @@ -124,30 +123,31 @@ _ZN18btQuantizedBvhTree20_calc_splitting_axisER18GIM_BVH_DATA_ARRAYii: # @_ZN18b ld.d $a7, $a6, 0 ld.d $t0, $a6, -16 fadd.s $fa4, $fa4, $fa5 - fmul.s $fa4, $fa4, $fa1 + fmul.s $fa4, $fa4, $fa0 vinsgr2vr.d $vr5, $a7, 0 vinsgr2vr.d $vr6, $t0, 0 vfadd.s $vr5, $vr5, $vr6 - vfmul.s $vr5, $vr5, $vr0 - fadd.s $fa3, $fa3, $fa4 - vfadd.s $vr2, $vr2, $vr5 + vfmul.s $vr5, $vr5, $vr3 + fadd.s $fa2, $fa2, $fa4 + vfadd.s $vr1, $vr1, $vr5 addi.d $a5, $a5, -1 addi.d $a6, $a6, 36 bnez $a5, .LBB1_2 # %bb.3: # %.lr.ph134 - movgr2fr.w $fa1, $a4 - ffint.s.w $fa1, $fa1 - frecip.s $fa5, $fa1 + movgr2fr.w $fa0, $a4 + ffint.s.w $fa0, $fa0 + frecip.s $fa4, $fa0 ld.d $a1, $a1, 16 - fmul.s $fa4, $fa5, $fa3 - vextrins.w $vr5, $vr5, 16 - vfmul.s $vr5, $vr5, $vr2 + fmul.s $fa3, $fa4, $fa2 + vextrins.w $vr4, $vr4, 16 + vfmul.s $vr4, $vr4, $vr1 add.d $a0, $a0, $a1 addi.d $a0, $a0, 20 sub.d $a1, $a3, $a2 - vrepli.b $vr2, 0 - movgr2fr.w $fa3, $zero - vldi $vr6, -1184 + vrepli.b $vr1, 0 + movgr2fr.w $fa2, $zero + vldi $vr5, -1184 + vldi $vr6, -3265 .p2align 4, , 16 .LBB1_4: # =>This Inner Loop Header: Depth=1 fld.s $fa7, $a0, -4 @@ -156,36 +156,36 @@ _ZN18btQuantizedBvhTree20_calc_splitting_axisER18GIM_BVH_DATA_ARRAYii: # @_ZN18b fadd.s $fa7, $fa7, $ft0 vinsgr2vr.d $vr8, $a2, 0 ld.d $a2, $a0, -16 - fmul.s $fa7, $fa7, $fa6 - fsub.s $fa7, $fa7, $fa4 + fmul.s $fa7, $fa7, $fa5 + fsub.s $fa7, $fa7, $fa3 fmul.s $fa7, $fa7, $fa7 vinsgr2vr.d $vr9, $a2, 0 vfadd.s $vr8, $vr8, $vr9 - vfmul.s $vr8, $vr8, $vr0 - vfsub.s $vr8, $vr8, $vr5 + vfmul.s $vr8, $vr8, $vr6 + vfsub.s $vr8, $vr8, $vr4 vfmul.s $vr8, $vr8, $vr8 - fadd.s $fa3, $fa3, $fa7 - vfadd.s $vr2, $vr2, $vr8 + fadd.s $fa2, $fa2, $fa7 + vfadd.s $vr1, $vr1, $vr8 addi.d $a1, $a1, -1 addi.d $a0, $a0, 36 bnez $a1, .LBB1_4 b .LBB1_6 .LBB1_5: # %._crit_edge movgr2fr.w $fa0, $a4 - ffint.s.w $fa1, $fa0 - vrepli.b $vr2, 0 - movgr2fr.w $fa3, $zero + ffint.s.w $fa0, $fa0 + vrepli.b $vr1, 0 + movgr2fr.w $fa2, $zero .LBB1_6: # %._crit_edge135 - vldi $vr0, -1040 - fadd.s $fa0, $fa1, $fa0 + vldi $vr3, -1040 + fadd.s $fa0, $fa0, $fa3 frecip.s $fa0, $fa0 - fmul.s $fa1, $fa0, $fa3 - vreplvei.w $vr3, $vr2, 0 + fmul.s $fa2, $fa0, $fa2 + vreplvei.w $vr3, $vr1, 0 fmul.s $fa3, $fa0, $fa3 - vreplvei.w $vr2, $vr2, 1 - fmul.s $fa0, $fa0, $fa2 - fcmp.clt.s $fcc0, $fa1, $fa3 - fsel $fa1, $fa1, $fa3, $fcc0 + vreplvei.w $vr1, $vr1, 1 + fmul.s $fa0, $fa0, $fa1 + fcmp.clt.s $fcc0, $fa2, $fa3 + fsel $fa1, $fa2, $fa3, $fcc0 fcmp.clt.s $fcc1, $fa1, $fa0 movcf2gr $a0, $fcc0 movcf2gr $a1, $fcc1 @@ -217,8 +217,7 @@ _ZN18btQuantizedBvhTree30_sort_and_calc_splitting_indexER18GIM_BVH_DATA_ARRAYiii vrepli.b $vr0, 0 movgr2fr.w $fa1, $zero vldi $vr2, -1184 - lu12i.w $t1, 258048 - vreplgr2vr.w $vr3, $t1 + vldi $vr3, -3265 .p2align 4, , 16 .LBB2_2: # =>This Inner Loop Header: Depth=1 fld.s $fa4, $a6, -4 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGeneric6DofSpringConstraint.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGeneric6DofSpringConstraint.s index 4ef34799..abc2dbfe 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGeneric6DofSpringConstraint.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGeneric6DofSpringConstraint.s @@ -23,9 +23,9 @@ _ZN29btGeneric6DofSpringConstraintC2ER11btRigidBodyS1_RK11btTransformS4_b: # @_Z xvrepli.b $xr0, 0 xvst $xr0, $fp, 1282 st.d $zero, $fp, 1312 - lu12i.w $a0, 260096 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -1424 vst $vr0, $fp, 1320 + lu12i.w $a0, 260096 lu52i.d $a0, $a0, 1016 st.d $a0, $fp, 1336 ld.d $fp, $sp, 0 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGhostObject.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGhostObject.s index f870f072..5702bea0 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGhostObject.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGhostObject.s @@ -1935,8 +1935,7 @@ _ZNK11btMatrix3x311getRotationER12btQuaternion: # @_ZNK11btMatrix3x311getRotatio fsub.s $fa2, $fa2, $fa5 vextrins.w $vr4, $vr0, 16 vshuf4i.w $vr0, $vr4, 64 - lu12i.w $a0, 258048 - vreplgr2vr.w $vr4, $a0 + vldi $vr4, -3265 vextrins.w $vr4, $vr1, 0 vextrins.w $vr4, $vr3, 16 vextrins.w $vr4, $vr2, 32 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHingeConstraint.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHingeConstraint.s index 84ea91c2..9afe2622 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHingeConstraint.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHingeConstraint.s @@ -4037,8 +4037,7 @@ _ZNK11btMatrix3x311getRotationER12btQuaternion: # @_ZNK11btMatrix3x311getRotatio fsub.s $fa2, $fa2, $fa5 vextrins.w $vr4, $vr0, 16 vshuf4i.w $vr0, $vr4, 64 - lu12i.w $a0, 258048 - vreplgr2vr.w $vr4, $a0 + vldi $vr4, -3265 vextrins.w $vr4, $vr1, 0 vextrins.w $vr4, $vr3, 16 vextrins.w $vr4, $vr2, 32 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRigidBody.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRigidBody.s index 6ed4d1df..59a4fb8a 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRigidBody.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRigidBody.s @@ -1460,8 +1460,7 @@ _ZNK11btMatrix3x311getRotationER12btQuaternion: # @_ZNK11btMatrix3x311getRotatio fsub.s $fa2, $fa2, $fa5 vextrins.w $vr4, $vr0, 16 vshuf4i.w $vr0, $vr4, 64 - lu12i.w $a0, 258048 - vreplgr2vr.w $vr4, $a0 + vldi $vr4, -3265 vextrins.w $vr4, $vr1, 0 vextrins.w $vr4, $vr3, 16 vextrins.w $vr4, $vr2, 32 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSequentialImpulseConstraintSolver.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSequentialImpulseConstraintSolver.s index b0952802..227ff697 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSequentialImpulseConstraintSolver.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSequentialImpulseConstraintSolver.s @@ -6844,8 +6844,7 @@ _ZNK11btMatrix3x311getRotationER12btQuaternion: # @_ZNK11btMatrix3x311getRotatio fsub.s $fa2, $fa2, $fa5 vextrins.w $vr4, $vr0, 16 vshuf4i.w $vr0, $vr4, 64 - lu12i.w $a0, 258048 - vreplgr2vr.w $vr4, $a0 + vldi $vr4, -3265 vextrins.w $vr4, $vr1, 0 vextrins.w $vr4, $vr3, 16 vextrins.w $vr4, $vr2, 32 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBody.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBody.s index eeced963..e9a48c4d 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBody.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBody.s @@ -156,8 +156,7 @@ _ZN10btSoftBodyC2EP19btSoftBodyWorldInfoiPK9btVector3PKf: # @_ZN10btSoftBodyC2EP vst $vr1, $fp, 320 st.w $zero, $fp, 336 xvst $xr0, $fp, 340 - lu12i.w $a1, 258048 - vreplgr2vr.w $vr0, $a1 + vldi $vr0, -3265 vst $vr0, $fp, 372 pcalau12i $a1, %pc_hi20(.LCPI0_1) vld $vr0, $a1, %pc_lo12(.LCPI0_1) diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBodyHelpers.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBodyHelpers.s index c80ea5c6..7c181ee8 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBodyHelpers.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBodyHelpers.s @@ -1615,8 +1615,7 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel b .LBB0_143 .LBB0_100: ld.d $a1, $s0, 1144 - lu12i.w $a0, 260096 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1168 vst $vr0, $sp, 224 vld $vr0, $sp, 80 # 16-byte Folded Reload vst $vr0, $sp, 160 @@ -3194,11 +3193,10 @@ _ZN17btSoftBodyHelpers12DrawNodeTreeEP10btSoftBodyP12btIDebugDrawii: # @_ZN17btS move $a6, $a3 move $a5, $a2 move $a2, $a1 + pcalau12i $a1, %pc_hi20(.LCPI6_0) + vld $vr0, $a1, %pc_lo12(.LCPI6_0) ld.d $a1, $a0, 1144 - pcalau12i $a0, %pc_hi20(.LCPI6_0) - vld $vr0, $a0, %pc_lo12(.LCPI6_0) - lu12i.w $a0, 260096 - vreplgr2vr.d $vr1, $a0 + vldi $vr1, -1168 vst $vr1, $sp, 16 vst $vr0, $sp, 0 addi.d $a3, $sp, 16 @@ -3769,27 +3767,26 @@ _ZL8drawTreeP12btIDebugDrawPK10btDbvtNodeiRK9btVector3S6_ii: # @_ZL8drawTreeP12b _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw .cfi_startproc # %bb.0: - addi.d $sp, $sp, -288 - .cfi_def_cfa_offset 288 - st.d $ra, $sp, 280 # 8-byte Folded Spill - st.d $fp, $sp, 272 # 8-byte Folded Spill - st.d $s0, $sp, 264 # 8-byte Folded Spill - st.d $s1, $sp, 256 # 8-byte Folded Spill - st.d $s2, $sp, 248 # 8-byte Folded Spill - st.d $s3, $sp, 240 # 8-byte Folded Spill - st.d $s4, $sp, 232 # 8-byte Folded Spill - st.d $s5, $sp, 224 # 8-byte Folded Spill - st.d $s6, $sp, 216 # 8-byte Folded Spill - st.d $s7, $sp, 208 # 8-byte Folded Spill - st.d $s8, $sp, 200 # 8-byte Folded Spill - fst.d $fs0, $sp, 192 # 8-byte Folded Spill - fst.d $fs1, $sp, 184 # 8-byte Folded Spill - fst.d $fs2, $sp, 176 # 8-byte Folded Spill - fst.d $fs3, $sp, 168 # 8-byte Folded Spill - fst.d $fs4, $sp, 160 # 8-byte Folded Spill - fst.d $fs5, $sp, 152 # 8-byte Folded Spill - fst.d $fs6, $sp, 144 # 8-byte Folded Spill - fst.d $fs7, $sp, 136 # 8-byte Folded Spill + addi.d $sp, $sp, -272 + .cfi_def_cfa_offset 272 + st.d $ra, $sp, 264 # 8-byte Folded Spill + st.d $fp, $sp, 256 # 8-byte Folded Spill + st.d $s0, $sp, 248 # 8-byte Folded Spill + st.d $s1, $sp, 240 # 8-byte Folded Spill + st.d $s2, $sp, 232 # 8-byte Folded Spill + st.d $s3, $sp, 224 # 8-byte Folded Spill + st.d $s4, $sp, 216 # 8-byte Folded Spill + st.d $s5, $sp, 208 # 8-byte Folded Spill + st.d $s6, $sp, 200 # 8-byte Folded Spill + st.d $s7, $sp, 192 # 8-byte Folded Spill + fst.d $fs0, $sp, 184 # 8-byte Folded Spill + fst.d $fs1, $sp, 176 # 8-byte Folded Spill + fst.d $fs2, $sp, 168 # 8-byte Folded Spill + fst.d $fs3, $sp, 160 # 8-byte Folded Spill + fst.d $fs4, $sp, 152 # 8-byte Folded Spill + fst.d $fs5, $sp, 144 # 8-byte Folded Spill + fst.d $fs6, $sp, 136 # 8-byte Folded Spill + fst.d $fs7, $sp, 128 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -3800,15 +3797,14 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod .cfi_offset 28, -64 .cfi_offset 29, -72 .cfi_offset 30, -80 - .cfi_offset 31, -88 - .cfi_offset 56, -96 - .cfi_offset 57, -104 - .cfi_offset 58, -112 - .cfi_offset 59, -120 - .cfi_offset 60, -128 - .cfi_offset 61, -136 - .cfi_offset 62, -144 - .cfi_offset 63, -152 + .cfi_offset 56, -88 + .cfi_offset 57, -96 + .cfi_offset 58, -104 + .cfi_offset 59, -112 + .cfi_offset 60, -120 + .cfi_offset 61, -128 + .cfi_offset 62, -136 + .cfi_offset 63, -144 move $fp, $a0 ld.bu $a0, $a0, 537 beqz $a0, .LBB11_4 @@ -3925,17 +3921,17 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod movfr2gr.s $a0, $fa0 movfr2gr.s $a1, $fa2 bstrins.d $a0, $a1, 63, 32 - st.d $a0, $sp, 120 + st.d $a0, $sp, 112 ld.d $a0, $s0, 0 pcalau12i $a1, %pc_hi20(.LCPI11_0) vld $vr0, $a1, %pc_lo12(.LCPI11_0) movfr2gr.s $a1, $fa1 ld.d $a4, $a0, 40 bstrpick.d $a0, $a1, 31, 0 - st.d $a0, $sp, 128 + st.d $a0, $sp, 120 vst $vr0, $sp, 96 addi.d $a1, $sp, 80 - addi.d $a2, $sp, 120 + addi.d $a2, $sp, 112 addi.d $a3, $sp, 96 move $a0, $s0 jirl $ra, $a4, 0 @@ -3953,17 +3949,17 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod movfr2gr.s $a0, $fa0 movfr2gr.s $a1, $fa2 bstrins.d $a0, $a1, 63, 32 - st.d $a0, $sp, 120 + st.d $a0, $sp, 112 ld.d $a0, $s0, 0 pcalau12i $a1, %pc_hi20(.LCPI11_1) vld $vr0, $a1, %pc_lo12(.LCPI11_1) movfr2gr.s $a1, $fa1 ld.d $a4, $a0, 40 bstrpick.d $a0, $a1, 31, 0 - st.d $a0, $sp, 128 + st.d $a0, $sp, 120 vst $vr0, $sp, 96 addi.d $a1, $sp, 80 - addi.d $a2, $sp, 120 + addi.d $a2, $sp, 112 addi.d $a3, $sp, 96 move $a0, $s0 jirl $ra, $a4, 0 @@ -3984,17 +3980,17 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod movfr2gr.s $a0, $fa0 movfr2gr.s $a1, $fa1 bstrins.d $a0, $a1, 63, 32 - st.d $a0, $sp, 120 + st.d $a0, $sp, 112 ld.d $a0, $s0, 0 pcalau12i $a1, %pc_hi20(.LCPI11_2) vld $vr0, $a1, %pc_lo12(.LCPI11_2) movfr2gr.s $a1, $fa2 ld.d $a4, $a0, 40 bstrpick.d $a0, $a1, 31, 0 - st.d $a0, $sp, 128 + st.d $a0, $sp, 120 vst $vr0, $sp, 96 addi.d $a1, $sp, 80 - addi.d $a2, $sp, 120 + addi.d $a2, $sp, 112 addi.d $a3, $sp, 96 move $a0, $s0 jirl $ra, $a4, 0 @@ -4009,7 +4005,6 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod fst.s $fa0, $sp, 28 # 4-byte Folded Spill move $s1, $zero move $s2, $zero - lu12i.w $s3, 260096 fst.s $fs2, $sp, 40 # 4-byte Folded Spill fst.s $fs4, $sp, 32 # 4-byte Folded Spill fst.s $fs6, $sp, 24 # 4-byte Folded Spill @@ -4041,18 +4036,18 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod fmov.s $fs6, $fs5 fadd.s $fs5, $fa5, $fa4 fadd.s $fs3, $fa0, $fa6 - movfr2gr.s $s4, $fs7 - movfr2gr.s $s5, $fs5 + movfr2gr.s $s3, $fs7 + movfr2gr.s $s4, $fs5 movfr2gr.s $a0, $fs3 - bstrpick.d $s7, $a0, 31, 0 - vreplgr2vr.d $vr0, $s3 + bstrpick.d $s6, $a0, 31, 0 + vldi $vr0, -1168 vst $vr0, $sp, 64 fld.s $fs0, $sp, 36 # 4-byte Folded Reload fadd.s $fa0, $fs7, $fs0 movfr2gr.s $a0, $fa0 - bstrins.d $a0, $s5, 63, 32 - st.d $a0, $sp, 120 - st.d $s7, $sp, 128 + bstrins.d $a0, $s4, 63, 32 + st.d $a0, $sp, 112 + st.d $s6, $sp, 120 fld.s $fs1, $sp, 28 # 4-byte Folded Reload fadd.s $fa0, $fs7, $fs1 fld.s $fs4, $sp, 44 # 4-byte Folded Reload @@ -4060,24 +4055,24 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod fadd.s $fa2, $fs3, $fs4 movfr2gr.s $a0, $fa0 ld.d $a1, $s0, 0 - movfr2gr.s $s6, $fa1 - bstrins.d $a0, $s6, 63, 32 + movfr2gr.s $s5, $fa1 + bstrins.d $a0, $s5, 63, 32 st.d $a0, $sp, 96 ld.d $a4, $a1, 40 movfr2gr.s $a0, $fa2 - bstrpick.d $s8, $a0, 31, 0 - st.d $s8, $sp, 104 - addi.d $a1, $sp, 120 + bstrpick.d $s7, $a0, 31, 0 + st.d $s7, $sp, 104 + addi.d $a1, $sp, 112 addi.d $a2, $sp, 96 addi.d $a3, $sp, 64 move $a0, $s0 jirl $ra, $a4, 0 fadd.s $fa0, $fs5, $fs0 movfr2gr.s $a0, $fa0 - move $a1, $s4 + move $a1, $s3 bstrins.d $a1, $a0, 63, 32 - st.d $a1, $sp, 120 - st.d $s7, $sp, 128 + st.d $a1, $sp, 112 + st.d $s6, $sp, 120 fadd.s $fa0, $fs7, $fs4 fmov.s $fs7, $fs2 ld.d $a0, $s0, 0 @@ -4086,33 +4081,33 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod fld.s $fs6, $sp, 24 # 4-byte Folded Reload fld.s $fs4, $sp, 32 # 4-byte Folded Reload fld.s $fs2, $sp, 40 # 4-byte Folded Reload - movfr2gr.s $s7, $fa0 + movfr2gr.s $s6, $fa0 movfr2gr.s $a1, $fa1 - move $a2, $s7 + move $a2, $s6 ld.d $a4, $a0, 40 bstrins.d $a2, $a1, 63, 32 st.d $a2, $sp, 96 - st.d $s8, $sp, 104 - addi.d $a1, $sp, 120 + st.d $s7, $sp, 104 + addi.d $a1, $sp, 112 addi.d $a2, $sp, 96 addi.d $a3, $sp, 64 move $a0, $s0 jirl $ra, $a4, 0 fadd.s $fa0, $fs3, $fs0 - bstrins.d $s4, $s5, 63, 32 + bstrins.d $s3, $s4, 63, 32 movfr2gr.s $a0, $fa0 bstrpick.d $a0, $a0, 31, 0 - st.d $s4, $sp, 120 - st.d $a0, $sp, 128 + st.d $s3, $sp, 112 + st.d $a0, $sp, 120 ld.d $a0, $s0, 0 fadd.s $fa0, $fs3, $fs1 - bstrins.d $s7, $s6, 63, 32 + bstrins.d $s6, $s5, 63, 32 movfr2gr.s $a1, $fa0 ld.d $a4, $a0, 40 bstrpick.d $a0, $a1, 31, 0 - st.d $s7, $sp, 96 + st.d $s6, $sp, 96 st.d $a0, $sp, 104 - addi.d $a1, $sp, 120 + addi.d $a1, $sp, 112 addi.d $a2, $sp, 96 addi.d $a3, $sp, 64 move $a0, $s0 @@ -4122,26 +4117,25 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod addi.d $s1, $s1, 16 blt $s2, $a0, .LBB11_3 .LBB11_4: - fld.d $fs7, $sp, 136 # 8-byte Folded Reload - fld.d $fs6, $sp, 144 # 8-byte Folded Reload - fld.d $fs5, $sp, 152 # 8-byte Folded Reload - fld.d $fs4, $sp, 160 # 8-byte Folded Reload - fld.d $fs3, $sp, 168 # 8-byte Folded Reload - fld.d $fs2, $sp, 176 # 8-byte Folded Reload - fld.d $fs1, $sp, 184 # 8-byte Folded Reload - fld.d $fs0, $sp, 192 # 8-byte Folded Reload - ld.d $s8, $sp, 200 # 8-byte Folded Reload - ld.d $s7, $sp, 208 # 8-byte Folded Reload - ld.d $s6, $sp, 216 # 8-byte Folded Reload - ld.d $s5, $sp, 224 # 8-byte Folded Reload - ld.d $s4, $sp, 232 # 8-byte Folded Reload - ld.d $s3, $sp, 240 # 8-byte Folded Reload - ld.d $s2, $sp, 248 # 8-byte Folded Reload - ld.d $s1, $sp, 256 # 8-byte Folded Reload - ld.d $s0, $sp, 264 # 8-byte Folded Reload - ld.d $fp, $sp, 272 # 8-byte Folded Reload - ld.d $ra, $sp, 280 # 8-byte Folded Reload - addi.d $sp, $sp, 288 + fld.d $fs7, $sp, 128 # 8-byte Folded Reload + fld.d $fs6, $sp, 136 # 8-byte Folded Reload + fld.d $fs5, $sp, 144 # 8-byte Folded Reload + fld.d $fs4, $sp, 152 # 8-byte Folded Reload + fld.d $fs3, $sp, 160 # 8-byte Folded Reload + fld.d $fs2, $sp, 168 # 8-byte Folded Reload + fld.d $fs1, $sp, 176 # 8-byte Folded Reload + fld.d $fs0, $sp, 184 # 8-byte Folded Reload + ld.d $s7, $sp, 192 # 8-byte Folded Reload + ld.d $s6, $sp, 200 # 8-byte Folded Reload + ld.d $s5, $sp, 208 # 8-byte Folded Reload + ld.d $s4, $sp, 216 # 8-byte Folded Reload + ld.d $s3, $sp, 224 # 8-byte Folded Reload + ld.d $s2, $sp, 232 # 8-byte Folded Reload + ld.d $s1, $sp, 240 # 8-byte Folded Reload + ld.d $s0, $sp, 248 # 8-byte Folded Reload + ld.d $fp, $sp, 256 # 8-byte Folded Reload + ld.d $ra, $sp, 264 # 8-byte Folded Reload + addi.d $sp, $sp, 272 ret .Lfunc_end11: .size _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw, .Lfunc_end11-_ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw @@ -4220,17 +4214,16 @@ _ZN17btSoftBodyHelpers10CreateRopeER19btSoftBodyWorldInfoRK9btVector3S4_ii: # @_ slt $a1, $a0, $fp masknez $a0, $a0, $a1 maskeqz $a1, $fp, $a1 - or $a2, $a1, $a0 - lu12i.w $a0, 524287 - ori $a1, $a0, 4093 - lu12i.w $a0, 260096 - bgeu $a1, $s0, .LBB12_3 + lu12i.w $a2, 524287 + ori $a2, $a2, 4093 + or $a1, $a1, $a0 + bgeu $a2, $s0, .LBB12_3 # %bb.2: - move $a1, $zero + move $a0, $zero b .LBB12_6 .LBB12_3: # %vector.ph - bstrpick.d $a1, $a2, 30, 1 - slli.d $a1, $a1, 1 + bstrpick.d $a0, $a1, 30, 1 + slli.d $a0, $a0, 1 vori.b $vr7, $vr3, 0 vextrins.w $vr7, $vr3, 16 vori.b $vr8, $vr0, 0 @@ -4245,14 +4238,14 @@ _ZN17btSoftBodyHelpers10CreateRopeER19btSoftBodyWorldInfoRK9btVector3S4_ii: # @_ vextrins.w $vr12, $vr2, 16 vori.b $vr13, $vr6, 0 vextrins.w $vr13, $vr6, 16 - ori $a3, $zero, 0 - lu32i.d $a3, 1 - vreplgr2vr.d $vr14, $a3 + ori $a2, $zero, 0 + lu32i.d $a2, 1 + vreplgr2vr.d $vr14, $a2 vrepli.b $vr15, 0 - vreplgr2vr.w $vr16, $a0 - move $a3, $a1 - move $a4, $s3 - move $a5, $s2 + vldi $vr16, -1424 + move $a2, $a0 + move $a3, $s3 + move $a4, $s2 .p2align 4, , 16 .LBB12_4: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -4269,24 +4262,25 @@ _ZN17btSoftBodyHelpers10CreateRopeER19btSoftBodyWorldInfoRK9btVector3S4_ii: # @_ vpackod.d $vr19, $vr17, $vr18 vpackev.d $vr17, $vr17, $vr18 xvpermi.q $xr17, $xr19, 2 - xvst $xr17, $a5, 0 - vstelm.d $vr16, $a4, 0, 0 + xvst $xr17, $a4, 0 + vstelm.d $vr16, $a3, 0, 0 vaddi.wu $vr14, $vr14, 2 - addi.d $a5, $a5, 32 - addi.d $a3, $a3, -2 - addi.d $a4, $a4, 8 - bnez $a3, .LBB12_4 + addi.d $a4, $a4, 32 + addi.d $a2, $a2, -2 + addi.d $a3, $a3, 8 + bnez $a2, .LBB12_4 # %bb.5: # %middle.block - beq $a1, $a2, .LBB12_8 + beq $a0, $a1, .LBB12_8 .LBB12_6: # %scalar.ph.preheader - alsl.d $a3, $a1, $s3, 2 - alsl.d $a4, $a1, $s2, 4 - addi.d $a4, $a4, 8 - sub.d $a2, $a2, $a1 + alsl.d $a2, $a0, $s3, 2 + alsl.d $a3, $a0, $s2, 4 + addi.d $a3, $a3, 8 + sub.d $a1, $a1, $a0 + lu12i.w $a4, 260096 .p2align 4, , 16 .LBB12_7: # %scalar.ph # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a1, 31, 0 + bstrpick.d $a5, $a0, 31, 0 movgr2fr.d $fa7, $a5 ffint.s.l $fa7, $fa7 fdiv.s $fa7, $fa7, $fa3 @@ -4298,14 +4292,14 @@ _ZN17btSoftBodyHelpers10CreateRopeER19btSoftBodyWorldInfoRK9btVector3S4_ii: # @_ bstrins.d $a5, $a6, 63, 32 movfr2gr.s $a6, $fa7 bstrpick.d $a6, $a6, 31, 0 - st.d $a5, $a4, -8 - st.d $a6, $a4, 0 - st.w $a0, $a3, 0 - addi.w $a1, $a1, 1 - addi.d $a3, $a3, 4 - addi.d $a2, $a2, -1 - addi.d $a4, $a4, 16 - bnez $a2, .LBB12_7 + st.d $a5, $a3, -8 + st.d $a6, $a3, 0 + st.w $a4, $a2, 0 + addi.w $a0, $a0, 1 + addi.d $a2, $a2, 4 + addi.d $a1, $a1, -1 + addi.d $a3, $a3, 16 + bnez $a1, .LBB12_7 .LBB12_8: # %._crit_edge ori $a0, $zero, 1496 ori $a1, $zero, 16 @@ -4572,8 +4566,8 @@ _ZN17btSoftBodyHelpers11CreatePatchER19btSoftBodyWorldInfoRK9btVector3S4_S4_S4_i lu32i.d $a7, 1 vreplgr2vr.d $vr0, $a7 vrepli.b $vr17, 0 + vldi $vr18, -1424 lu12i.w $a7, 260096 - vreplgr2vr.w $vr18, $a7 b .LBB13_4 .p2align 4, , 16 .LBB13_3: # %._crit_edge.us @@ -5107,8 +5101,8 @@ _ZN17btSoftBodyHelpers13CreatePatchUVER19btSoftBodyWorldInfoRK9btVector3S4_S4_S4 lu32i.d $a7, 1 vreplgr2vr.d $vr2, $a7 vrepli.b $vr16, 0 + vldi $vr17, -1424 lu12i.w $a7, 260096 - vreplgr2vr.w $vr17, $a7 b .LBB14_4 .p2align 4, , 16 .LBB14_3: # %._crit_edge.us diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_box_set.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_box_set.s index def011b4..122664d8 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_box_set.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_box_set.s @@ -16,11 +16,10 @@ _ZN12GIM_BOX_TREE20_calc_splitting_axisER9gim_arrayI13GIM_AABB_DATAEjj: # @_ZN12 add.d $a5, $a3, $a5 addi.d $a5, $a5, 20 sub.d $a6, $a2, $a0 - vrepli.b $vr2, 0 - movgr2fr.w $fa3, $zero - vldi $vr1, -1184 - lu12i.w $a7, 258048 - vreplgr2vr.w $vr0, $a7 + vrepli.b $vr1, 0 + movgr2fr.w $fa2, $zero + vldi $vr0, -1184 + vldi $vr3, -3265 .p2align 4, , 16 .LBB0_2: # =>This Inner Loop Header: Depth=1 fld.s $fa4, $a5, -4 @@ -28,31 +27,32 @@ _ZN12GIM_BOX_TREE20_calc_splitting_axisER9gim_arrayI13GIM_AABB_DATAEjj: # @_ZN12 ld.d $a7, $a5, 0 ld.d $t0, $a5, -16 fadd.s $fa4, $fa4, $fa5 - fmul.s $fa4, $fa4, $fa1 + fmul.s $fa4, $fa4, $fa0 vinsgr2vr.d $vr5, $a7, 0 vinsgr2vr.d $vr6, $t0, 0 vfadd.s $vr5, $vr5, $vr6 - vfmul.s $vr5, $vr5, $vr0 - fadd.s $fa3, $fa3, $fa4 - vfadd.s $vr2, $vr2, $vr5 + vfmul.s $vr5, $vr5, $vr3 + fadd.s $fa2, $fa2, $fa4 + vfadd.s $vr1, $vr1, $vr5 addi.d $a6, $a6, -1 addi.d $a5, $a5, 36 bnez $a6, .LBB0_2 # %bb.3: # %.lr.ph134 bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa1, $a4 - ffint.s.l $fa1, $fa1 - frecip.s $fa5, $fa1 + movgr2fr.d $fa0, $a4 + ffint.s.l $fa0, $fa0 + frecip.s $fa4, $fa0 ld.d $a1, $a1, 0 - fmul.s $fa4, $fa5, $fa3 - vextrins.w $vr5, $vr5, 16 - vfmul.s $vr5, $vr5, $vr2 + fmul.s $fa3, $fa4, $fa2 + vextrins.w $vr4, $vr4, 16 + vfmul.s $vr4, $vr4, $vr1 add.d $a1, $a3, $a1 addi.d $a1, $a1, 20 sub.d $a0, $a2, $a0 - vrepli.b $vr2, 0 - movgr2fr.w $fa3, $zero - vldi $vr6, -1184 + vrepli.b $vr1, 0 + movgr2fr.w $fa2, $zero + vldi $vr5, -1184 + vldi $vr6, -3265 .p2align 4, , 16 .LBB0_4: # =>This Inner Loop Header: Depth=1 fld.s $fa7, $a1, -4 @@ -61,16 +61,16 @@ _ZN12GIM_BOX_TREE20_calc_splitting_axisER9gim_arrayI13GIM_AABB_DATAEjj: # @_ZN12 fadd.s $fa7, $fa7, $ft0 vinsgr2vr.d $vr8, $a2, 0 ld.d $a2, $a1, -16 - fmul.s $fa7, $fa7, $fa6 - fsub.s $fa7, $fa7, $fa4 + fmul.s $fa7, $fa7, $fa5 + fsub.s $fa7, $fa7, $fa3 fmul.s $fa7, $fa7, $fa7 vinsgr2vr.d $vr9, $a2, 0 vfadd.s $vr8, $vr8, $vr9 - vfmul.s $vr8, $vr8, $vr0 - vfsub.s $vr8, $vr8, $vr5 + vfmul.s $vr8, $vr8, $vr6 + vfsub.s $vr8, $vr8, $vr4 vfmul.s $vr8, $vr8, $vr8 - fadd.s $fa3, $fa3, $fa7 - vfadd.s $vr2, $vr2, $vr8 + fadd.s $fa2, $fa2, $fa7 + vfadd.s $vr1, $vr1, $vr8 addi.d $a0, $a0, -1 addi.d $a1, $a1, 36 bnez $a0, .LBB0_4 @@ -78,20 +78,20 @@ _ZN12GIM_BOX_TREE20_calc_splitting_axisER9gim_arrayI13GIM_AABB_DATAEjj: # @_ZN12 .LBB0_5: # %._crit_edge bstrpick.d $a0, $a4, 31, 0 movgr2fr.d $fa0, $a0 - ffint.s.l $fa1, $fa0 - vrepli.b $vr2, 0 - movgr2fr.w $fa3, $zero + ffint.s.l $fa0, $fa0 + vrepli.b $vr1, 0 + movgr2fr.w $fa2, $zero .LBB0_6: # %._crit_edge135 - vldi $vr0, -1040 - fadd.s $fa0, $fa1, $fa0 + vldi $vr3, -1040 + fadd.s $fa0, $fa0, $fa3 frecip.s $fa0, $fa0 - fmul.s $fa1, $fa0, $fa3 - vreplvei.w $vr3, $vr2, 0 + fmul.s $fa2, $fa0, $fa2 + vreplvei.w $vr3, $vr1, 0 fmul.s $fa3, $fa0, $fa3 - vreplvei.w $vr2, $vr2, 1 - fmul.s $fa0, $fa0, $fa2 - fcmp.clt.s $fcc0, $fa1, $fa3 - fsel $fa1, $fa1, $fa3, $fcc0 + vreplvei.w $vr1, $vr1, 1 + fmul.s $fa0, $fa0, $fa1 + fcmp.clt.s $fcc0, $fa2, $fa3 + fsel $fa1, $fa2, $fa3, $fcc0 fcmp.clt.s $fcc1, $fa1, $fa0 movcf2gr $a0, $fcc0 movcf2gr $a1, $fcc1 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_contact.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_contact.s index 8607ad98..e4c159ed 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_contact.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_contact.s @@ -160,16 +160,14 @@ _ZN17gim_contact_array14merge_contactsERKS_b: # @_ZN17gim_contact_array14merge_c addi.d $a4, $a0, 32 addi.d $a5, $a2, 192 ori $a6, $zero, 7 - lu12i.w $a7, 260096 - xvreplgr2vr.w $xr0, $a7 lu12i.w $a7, 280480 - xvreplgr2vr.w $xr1, $a7 + xvreplgr2vr.w $xr0, $a7 + xvldi $xr1, -1424 lu12i.w $a7, 281194 xvreplgr2vr.w $xr2, $a7 - lu12i.w $a7, 263168 - xvreplgr2vr.w $xr3, $a7 lu12i.w $a7, 282709 - xvreplgr2vr.w $xr4, $a7 + xvreplgr2vr.w $xr3, $a7 + xvldi $xr4, -1528 move $a7, $a1 .p2align 4, , 16 .LBB0_18: # %vector.body @@ -196,7 +194,7 @@ _ZN17gim_contact_array14merge_contactsERKS_b: # @_ZN17gim_contact_array14merge_c xvinsve0.w $xr5, $xr10, 5 xvinsve0.w $xr5, $xr11, 6 xvinsve0.w $xr5, $xr12, 7 - xvfmadd.s $xr5, $xr5, $xr1, $xr0 + xvfmadd.s $xr5, $xr5, $xr0, $xr1 xvftintrz.w.s $xr5, $xr5 fld.s $fa6, $a5, -188 fld.s $fa7, $a5, -140 @@ -230,7 +228,7 @@ _ZN17gim_contact_array14merge_contactsERKS_b: # @_ZN17gim_contact_array14merge_c xvinsve0.w $xr7, $xr12, 5 xvinsve0.w $xr7, $xr13, 6 xvinsve0.w $xr7, $xr14, 7 - xvfmadd.s $xr7, $xr7, $xr4, $xr3 + xvfmadd.s $xr7, $xr7, $xr3, $xr4 xvftintrz.w.s $xr7, $xr7 xvslli.w $xr6, $xr6, 4 xvadd.w $xr5, $xr6, $xr5 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/partition.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/partition.s index 17716a5b..58821e12 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/partition.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/partition.s @@ -3331,8 +3331,7 @@ _ZN4Mesh15partition_cellsEiRSt6vectorIiSaIiEE16partition_method: # @_ZN4Mesh15pa xvreplve0.d $xr5, $xr1 xvreplve0.d $xr6, $xr2 xvreplve0.d $xr7, $xr3 - lu52i.d $a5, $zero, 1022 - xvreplgr2vr.d $xr8, $a5 + xvldi $xr8, -928 move $a5, $a0 move $a6, $a1 move $a7, $s3 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Hydro.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Hydro.s index 34737b33..7e471110 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Hydro.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Hydro.s @@ -1814,20 +1814,20 @@ _ZN5Hydro12resetDtHydroEv: # @_ZN5Hydro12resetDtHydroEv _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd .cfi_startproc # %bb.0: - addi.d $sp, $sp, -448 - .cfi_def_cfa_offset 448 - st.d $ra, $sp, 440 # 8-byte Folded Spill - st.d $fp, $sp, 432 # 8-byte Folded Spill - st.d $s0, $sp, 424 # 8-byte Folded Spill - st.d $s1, $sp, 416 # 8-byte Folded Spill - st.d $s2, $sp, 408 # 8-byte Folded Spill - st.d $s3, $sp, 400 # 8-byte Folded Spill - st.d $s4, $sp, 392 # 8-byte Folded Spill - st.d $s5, $sp, 384 # 8-byte Folded Spill - st.d $s6, $sp, 376 # 8-byte Folded Spill - st.d $s7, $sp, 368 # 8-byte Folded Spill - st.d $s8, $sp, 360 # 8-byte Folded Spill - fst.d $fs0, $sp, 352 # 8-byte Folded Spill + addi.d $sp, $sp, -432 + .cfi_def_cfa_offset 432 + st.d $ra, $sp, 424 # 8-byte Folded Spill + st.d $fp, $sp, 416 # 8-byte Folded Spill + st.d $s0, $sp, 408 # 8-byte Folded Spill + st.d $s1, $sp, 400 # 8-byte Folded Spill + st.d $s2, $sp, 392 # 8-byte Folded Spill + st.d $s3, $sp, 384 # 8-byte Folded Spill + st.d $s4, $sp, 376 # 8-byte Folded Spill + st.d $s5, $sp, 368 # 8-byte Folded Spill + st.d $s6, $sp, 360 # 8-byte Folded Spill + st.d $s7, $sp, 352 # 8-byte Folded Spill + st.d $s8, $sp, 344 # 8-byte Folded Spill + fst.d $fs0, $sp, 336 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -1844,45 +1844,45 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ld.d $a0, $a0, 0 ld.w $s0, $a0, 504 ld.w $a1, $a0, 400 - st.d $a1, $sp, 224 # 8-byte Folded Spill + st.d $a1, $sp, 208 # 8-byte Folded Spill ld.d $a1, $a0, 240 - st.d $a1, $sp, 256 # 8-byte Folded Spill + st.d $a1, $sp, 240 # 8-byte Folded Spill ld.d $a1, $a0, 248 - st.d $a1, $sp, 120 # 8-byte Folded Spill - ld.d $a1, $a0, 256 st.d $a1, $sp, 112 # 8-byte Folded Spill - ld.d $a1, $a0, 296 + ld.d $a1, $a0, 256 st.d $a1, $sp, 104 # 8-byte Folded Spill - ld.d $a1, $a0, 304 + ld.d $a1, $a0, 296 st.d $a1, $sp, 96 # 8-byte Folded Spill - ld.d $a1, $a0, 312 + ld.d $a1, $a0, 304 st.d $a1, $sp, 88 # 8-byte Folded Spill + ld.d $a1, $a0, 312 + st.d $a1, $sp, 80 # 8-byte Folded Spill ld.d $a1, $a0, 320 - st.d $a1, $sp, 216 # 8-byte Folded Spill + st.d $a1, $sp, 200 # 8-byte Folded Spill ld.d $a1, $a0, 328 - st.d $a1, $sp, 192 # 8-byte Folded Spill + st.d $a1, $sp, 160 # 8-byte Folded Spill ld.d $a1, $a0, 336 - st.d $a1, $sp, 80 # 8-byte Folded Spill + st.d $a1, $sp, 72 # 8-byte Folded Spill ld.d $s8, $a0, 344 ld.d $a1, $a0, 352 - st.d $a1, $sp, 304 # 8-byte Folded Spill + st.d $a1, $sp, 288 # 8-byte Folded Spill ld.d $a1, $a0, 360 - st.d $a1, $sp, 344 # 8-byte Folded Spill + st.d $a1, $sp, 328 # 8-byte Folded Spill ld.d $a1, $a0, 368 - st.d $a1, $sp, 160 # 8-byte Folded Spill + st.d $a1, $sp, 152 # 8-byte Folded Spill ld.d $a1, $a0, 376 - st.d $a1, $sp, 72 # 8-byte Folded Spill + st.d $a1, $sp, 64 # 8-byte Folded Spill ld.d $t7, $a0, 288 ld.d $s4, $a0, 264 ld.d $a1, $a0, 272 - st.d $a1, $sp, 64 # 8-byte Folded Spill - ld.d $a1, $a0, 280 st.d $a1, $sp, 56 # 8-byte Folded Spill + ld.d $a1, $a0, 280 + st.d $a1, $sp, 48 # 8-byte Folded Spill ld.d $s3, $a0, 384 ld.d $a1, $a0, 392 - st.d $a1, $sp, 208 # 8-byte Folded Spill + st.d $a1, $sp, 192 # 8-byte Folded Spill # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 288 # 16-byte Folded Spill + vst $vr0, $sp, 272 # 16-byte Folded Spill blez $s0, .LBB5_11 # %bb.1: # %.lr.ph move $a1, $zero @@ -1891,7 +1891,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ld.d $a4, $fp, 248 ld.d $a5, $fp, 256 vldi $vr0, -928 - vld $vr1, $sp, 288 # 16-byte Folded Reload + vld $vr1, $sp, 272 # 16-byte Folded Reload fmul.d $fa0, $fa1, $fa0 vreplvei.d $vr0, $vr0, 0 ori $a6, $zero, 1 @@ -1916,7 +1916,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd alsl.d $t4, $t1, $t7, 4 sub.d $t3, $t2, $t1 addi.d $t3, $t3, 1 - ld.d $t5, $sp, 256 # 8-byte Folded Reload + ld.d $t5, $sp, 240 # 8-byte Folded Reload add.d $t5, $t5, $t0 move $t6, $t3 .p2align 4, , 16 @@ -1967,22 +1967,22 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd bnez $a7, .LBB5_10 b .LBB5_2 .LBB5_11: # %.preheader - st.d $t7, $sp, 48 # 8-byte Folded Spill - ld.d $a1, $sp, 224 # 8-byte Folded Reload - st.d $s0, $sp, 128 # 8-byte Folded Spill + st.d $t7, $sp, 40 # 8-byte Folded Spill + ld.d $a1, $sp, 208 # 8-byte Folded Reload + st.d $s0, $sp, 120 # 8-byte Folded Spill blez $a1, .LBB5_35 # %bb.12: # %.lr.ph221 move $a4, $zero - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 288 # 8-byte Folded Reload addi.d $a0, $a0, 32 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill b .LBB5_14 .p2align 4, , 16 .LBB5_13: # %_ZN5Hydro12sumCrnrForceEPK7double2S2_S2_PS0_ii.exit # in Loop: Header=BB5_14 Depth=1 - ld.d $a4, $sp, 152 # 8-byte Folded Reload + ld.d $a4, $sp, 144 # 8-byte Folded Reload addi.d $a4, $a4, 1 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload beq $a4, $a0, .LBB5_34 .LBB5_14: # =>This Loop Header: Depth=1 # Child Loop BB5_30 Depth 2 @@ -1993,21 +1993,21 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ld.d $a1, $a0, 408 ld.d $a2, $a0, 456 ld.d $a3, $a0, 480 - st.d $a4, $sp, 152 # 8-byte Folded Spill + st.d $a4, $sp, 144 # 8-byte Folded Spill slli.d $a4, $a4, 2 ld.d $a5, $a0, 432 ldx.w $s6, $a2, $a4 ldx.w $a6, $a3, $a4 ldx.w $s0, $a1, $a4 ldx.w $s7, $a5, $a4 - ld.d $a1, $sp, 216 # 8-byte Folded Reload + ld.d $a1, $sp, 200 # 8-byte Folded Reload alsl.d $a1, $s6, $a1, 3 sub.d $a4, $a6, $s6 - ld.d $a2, $sp, 344 # 8-byte Folded Reload + ld.d $a2, $sp, 328 # 8-byte Folded Reload alsl.d $a3, $s6, $a2, 3 ori $a2, $zero, 2 - st.d $a6, $sp, 144 # 8-byte Folded Spill - st.d $a4, $sp, 136 # 8-byte Folded Spill + st.d $a6, $sp, 136 # 8-byte Folded Spill + st.d $a4, $sp, 128 # 8-byte Folded Spill blt $a4, $a2, .LBB5_32 # %bb.15: # in Loop: Header=BB5_14 Depth=1 slli.d $a2, $a4, 3 @@ -2018,9 +2018,9 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd .LBB5_16: # %_ZSt4copyIPdS0_ET0_T_S2_S1_.exit # in Loop: Header=BB5_14 Depth=1 move $a1, $s4 - ld.d $s2, $sp, 64 # 8-byte Folded Reload + ld.d $s2, $sp, 56 # 8-byte Folded Reload move $a2, $s2 - ld.d $s1, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 48 # 8-byte Folded Reload move $a3, $s1 move $a4, $s0 move $a5, $s7 @@ -2030,32 +2030,32 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd st.d $s7, $sp, 0 move $a1, $s4 move $a2, $s1 - ld.d $s5, $sp, 192 # 8-byte Folded Reload + ld.d $s5, $sp, 160 # 8-byte Folded Reload move $a3, $s5 - ld.d $a4, $sp, 80 # 8-byte Folded Reload + ld.d $a4, $sp, 72 # 8-byte Folded Reload move $a5, $s8 - ld.d $a6, $sp, 304 # 8-byte Folded Reload + ld.d $a6, $sp, 288 # 8-byte Folded Reload move $a7, $s0 pcaddu18i $ra, %call36(_ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 move $a1, $s1 move $a2, $s2 - ld.d $a3, $sp, 160 # 8-byte Folded Reload + ld.d $a3, $sp, 152 # 8-byte Folded Reload move $a4, $s0 move $a5, $s7 pcaddu18i $ra, %call36(_ZN4Mesh12calcSurfVecsEPK7double2S2_PS0_ii) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 move $a1, $s4 - ld.d $a2, $sp, 72 # 8-byte Folded Reload + ld.d $a2, $sp, 64 # 8-byte Folded Reload move $a3, $s0 move $a4, $s7 pcaddu18i $ra, %call36(_ZN4Mesh11calcEdgeLenEPK7double2Pdii) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 move $a1, $s5 - ld.d $a2, $sp, 208 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload move $a3, $s0 move $a4, $s7 pcaddu18i $ra, %call36(_ZN4Mesh11calcCharLenEPKdPdii) @@ -2063,12 +2063,12 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ld.d $a6, $fp, 296 ld.d $a0, $fp, 312 vldi $vr4, -928 - ld.d $t1, $sp, 144 # 8-byte Folded Reload + ld.d $t1, $sp, 136 # 8-byte Folded Reload bge $s6, $t1, .LBB5_21 # %bb.17: # %.lr.ph.i166.preheader # in Loop: Header=BB5_14 Depth=1 ori $a1, $zero, 8 - ld.d $t0, $sp, 136 # 8-byte Folded Reload + ld.d $t0, $sp, 128 # 8-byte Folded Reload bgeu $t0, $a1, .LBB5_27 .LBB5_18: # in Loop: Header=BB5_14 Depth=1 move $a1, $s6 @@ -2076,7 +2076,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd # in Loop: Header=BB5_14 Depth=1 sub.d $a2, $t1, $a1 alsl.d $a3, $a1, $a0, 3 - ld.d $a4, $sp, 304 # 8-byte Folded Reload + ld.d $a4, $sp, 288 # 8-byte Folded Reload alsl.d $a4, $a1, $a4, 3 alsl.d $a1, $a1, $a6, 3 .p2align 4, , 16 @@ -2139,16 +2139,16 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd st.d $t1, $sp, 16 st.d $s6, $sp, 8 st.d $a2, $sp, 0 - ld.d $a2, $sp, 304 # 8-byte Folded Reload - ld.d $a3, $sp, 344 # 8-byte Folded Reload - vld $vr0, $sp, 288 # 16-byte Folded Reload + ld.d $a2, $sp, 288 # 8-byte Folded Reload + ld.d $a3, $sp, 328 # 8-byte Folded Reload + vld $vr0, $sp, 272 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 pcaddu18i $ra, %call36(_ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii) jirl $ra, $ra, 0 ld.d $a0, $fp, 8 ld.d $a1, $fp, 352 ld.d $a3, $fp, 376 - ld.d $s1, $sp, 160 # 8-byte Folded Reload + ld.d $s1, $sp, 152 # 8-byte Folded Reload move $a2, $s1 move $a4, $s0 move $a5, $s7 @@ -2161,7 +2161,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd st.d $s7, $sp, 8 st.d $s0, $sp, 0 move $a1, $s8 - ld.d $a4, $sp, 192 # 8-byte Folded Reload + ld.d $a4, $sp, 160 # 8-byte Folded Reload move $a5, $s3 move $a6, $s1 pcaddu18i $ra, %call36(_ZN3TTS9calcForceEPKdS1_S1_S1_S1_PK7double2PS2_ii) @@ -2220,7 +2220,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd bltu $a1, $a3, .LBB5_18 # %bb.28: # %vector.memcheck # in Loop: Header=BB5_14 Depth=1 - ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $sp, 288 # 8-byte Folded Reload sub.d $a2, $a0, $a1 move $a1, $s6 bltu $a2, $a3, .LBB5_19 @@ -2233,7 +2233,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd addi.d $a3, $a3, 32 alsl.d $a4, $s6, $a0, 3 addi.d $a4, $a4, 32 - ld.d $a5, $sp, 40 # 8-byte Folded Reload + ld.d $a5, $sp, 32 # 8-byte Folded Reload alsl.d $a5, $s6, $a5, 3 move $a7, $a2 .p2align 4, , 16 @@ -2266,7 +2266,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd b .LBB5_16 .LBB5_34: # %._crit_edge.loopexit ld.d $a0, $fp, 0 - ld.d $s0, $sp, 128 # 8-byte Folded Reload + ld.d $s0, $sp, 120 # 8-byte Folded Reload .LBB5_35: # %._crit_edge pcaddu18i $ra, %call36(_ZN4Mesh13checkBadSidesEv) jirl $ra, $ra, 0 @@ -2286,26 +2286,23 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd move $s0, $zero ld.d $a1, $fp, 40 ld.d $a0, $fp, 32 - vld $vr0, $sp, 288 # 16-byte Folded Reload + vld $vr0, $sp, 272 # 16-byte Folded Reload vreplvei.d $vr6, $vr0, 0 ori $s1, $zero, 4 fld.d $fs0, $s2, %pc_lo12(.LCPI5_1) - lu52i.d $a2, $zero, 1022 - vreplgr2vr.d $vr7, $a2 lu12i.w $a2, 322956 ori $a2, $a2, 830 lu32i.d $a2, 98173 lu52i.d $a2, $a2, 694 xvreplgr2vr.d $xr8, $a2 - vst $vr6, $sp, 304 # 16-byte Folded Spill - vst $vr7, $sp, 192 # 16-byte Folded Spill + vst $vr6, $sp, 288 # 16-byte Folded Spill xvst $xr8, $sp, 160 # 32-byte Folded Spill b .LBB5_38 .p2align 4, , 16 .LBB5_37: # %_ZN5Hydro10advPosFullEPK7double2S2_S2_dPS0_S3_ii.exit # in Loop: Header=BB5_38 Depth=1 addi.d $s0, $s0, 1 - ld.d $a2, $sp, 128 # 8-byte Folded Reload + ld.d $a2, $sp, 120 # 8-byte Folded Reload beq $s0, $a2, .LBB5_54 .LBB5_38: # =>This Loop Header: Depth=1 # Child Loop BB5_40 Depth 2 @@ -2345,16 +2342,16 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd bltu $s3, $a2, .LBB5_40 # %bb.41: # %._crit_edge225 # in Loop: Header=BB5_38 Depth=1 - vld $vr6, $sp, 304 # 16-byte Folded Reload - vld $vr7, $sp, 192 # 16-byte Folded Reload + vld $vr6, $sp, 288 # 16-byte Folded Reload + vldi $vr7, -928 xvld $xr8, $sp, 160 # 32-byte Folded Reload bge $s5, $s6, .LBB5_37 b .LBB5_43 .p2align 4, , 16 .LBB5_42: # in Loop: Header=BB5_38 Depth=1 move $a1, $a0 - vld $vr6, $sp, 304 # 16-byte Folded Reload - vld $vr7, $sp, 192 # 16-byte Folded Reload + vld $vr6, $sp, 288 # 16-byte Folded Reload + vldi $vr7, -928 xvld $xr8, $sp, 160 # 32-byte Folded Reload bge $s5, $s6, .LBB5_37 .LBB5_43: # %.cont.preheader.i @@ -2398,9 +2395,9 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd # in Loop: Header=BB5_38 Depth=1 ld.d $a6, $fp, 248 ld.d $a7, $fp, 256 - ld.d $a4, $sp, 256 # 8-byte Folded Reload + ld.d $a4, $sp, 240 # 8-byte Folded Reload alsl.d $a4, $s5, $a4, 4 - ld.d $a5, $sp, 48 # 8-byte Folded Reload + ld.d $a5, $sp, 40 # 8-byte Folded Reload alsl.d $a5, $s5, $a5, 4 alsl.d $a6, $s5, $a6, 4 alsl.d $a7, $s5, $a7, 4 @@ -2511,19 +2508,19 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ld.d $a0, $a0, 6 st.d $a1, $fp, 168 st.d $a0, $fp, 174 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload blez $a0, .LBB5_62 # %bb.55: # %.lr.ph232 move $s5, $zero vldi $vr0, -800 - vld $vr1, $sp, 288 # 16-byte Folded Reload + vld $vr1, $sp, 272 # 16-byte Folded Reload fmul.d $fs0, $fa1, $fa0 b .LBB5_57 .p2align 4, , 16 .LBB5_56: # %_ZN5Hydro8calcWorkEPK7double2S2_S2_S2_S2_dPdS3_ii.exit # in Loop: Header=BB5_57 Depth=1 addi.d $s5, $s5, 1 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload beq $s5, $a0, .LBB5_62 .LBB5_57: # =>This Loop Header: Depth=1 # Child Loop BB5_61 Depth 2 @@ -2537,10 +2534,10 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ldx.w $s1, $a3, $a2 ldx.w $s3, $a4, $a2 ldx.w $s7, $a5, $a2 - ld.d $s6, $sp, 256 # 8-byte Folded Reload + ld.d $s6, $sp, 240 # 8-byte Folded Reload move $a1, $s6 - ld.d $a2, $sp, 120 # 8-byte Folded Reload - ld.d $s8, $sp, 112 # 8-byte Folded Reload + ld.d $a2, $sp, 112 # 8-byte Folded Reload + ld.d $s8, $sp, 104 # 8-byte Folded Reload move $a3, $s8 move $a4, $s0 move $a5, $s1 @@ -2550,10 +2547,10 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd st.d $s1, $sp, 0 move $a1, $s6 move $a2, $s8 - ld.d $a3, $sp, 104 # 8-byte Folded Reload - ld.d $a4, $sp, 96 # 8-byte Folded Reload - ld.d $a5, $sp, 88 # 8-byte Folded Reload - ld.d $a6, $sp, 216 # 8-byte Folded Reload + ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a4, $sp, 88 # 8-byte Folded Reload + ld.d $a5, $sp, 80 # 8-byte Folded Reload + ld.d $a6, $sp, 200 # 8-byte Folded Reload move $a7, $s0 pcaddu18i $ra, %call36(_ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii) jirl $ra, $ra, 0 @@ -2651,16 +2648,16 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd jirl $ra, $ra, 0 ld.d $a0, $fp, 0 ld.w $a1, $a0, 560 - ld.d $s3, $sp, 216 # 8-byte Folded Reload + ld.d $s3, $sp, 200 # 8-byte Folded Reload blez $a1, .LBB5_92 # %bb.63: # %.lr.ph236 move $s0, $zero - vld $vr0, $sp, 288 # 16-byte Folded Reload + vld $vr0, $sp, 272 # 16-byte Folded Reload frecip.d $ft0, $fa0 xvreplve0.d $xr0, $xr8 - xvst $xr0, $sp, 256 # 32-byte Folded Spill + xvst $xr0, $sp, 240 # 32-byte Folded Spill addi.d $s1, $s3, 32 - ld.d $a1, $sp, 344 # 8-byte Folded Reload + ld.d $a1, $sp, 328 # 8-byte Folded Reload addi.d $s4, $a1, 32 ori $s5, $zero, 8 ori $s6, $zero, 64 @@ -2669,21 +2666,21 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd lu32i.d $a1, 98173 lu52i.d $a1, $a1, 694 xvreplgr2vr.d $xr0, $a1 - xvst $xr0, $sp, 224 # 32-byte Folded Spill - xvst $xr8, $sp, 304 # 32-byte Folded Spill + xvst $xr0, $sp, 208 # 32-byte Folded Spill + xvst $xr8, $sp, 288 # 32-byte Folded Spill b .LBB5_65 .p2align 4, , 16 .LBB5_64: # %_ZN5Hydro7calcRhoEPKdS1_Pdii.exit213 # in Loop: Header=BB5_65 Depth=1 move $a0, $fp - ld.d $a1, $sp, 208 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload move $a2, $s3 - ld.d $a3, $sp, 344 # 8-byte Folded Reload - vld $vr0, $sp, 288 # 16-byte Folded Reload + ld.d $a3, $sp, 328 # 8-byte Folded Reload + vld $vr0, $sp, 272 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 pcaddu18i $ra, %call36(_ZN5Hydro11calcDtHydroEPKdS1_S1_dii) jirl $ra, $ra, 0 - xvld $xr8, $sp, 304 # 32-byte Folded Reload + xvld $xr8, $sp, 288 # 32-byte Folded Reload ld.d $a0, $fp, 0 ld.w $a1, $a0, 560 addi.d $s0, $s0, 1 @@ -2708,7 +2705,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ld.d $a3, $fp, 344 sub.d $a0, $a5, $a4 move $a6, $a4 - xvld $xr9, $sp, 256 # 32-byte Folded Reload + xvld $xr9, $sp, 240 # 32-byte Folded Reload bgeu $a0, $s5, .LBB5_75 .LBB5_67: # %.lr.ph.i197.preheader # in Loop: Header=BB5_65 Depth=1 @@ -2716,7 +2713,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd alsl.d $a3, $a6, $a3, 3 alsl.d $a2, $a6, $a2, 3 alsl.d $a1, $a6, $a1, 3 - ld.d $t0, $sp, 344 # 8-byte Folded Reload + ld.d $t0, $sp, 328 # 8-byte Folded Reload alsl.d $t0, $a6, $t0, 3 alsl.d $a6, $a6, $s3, 3 .p2align 4, , 16 @@ -2799,7 +2796,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd bltu $a7, $s6, .LBB5_67 # %bb.76: # %vector.memcheck344 # in Loop: Header=BB5_65 Depth=1 - ld.d $a6, $sp, 344 # 8-byte Folded Reload + ld.d $a6, $sp, 328 # 8-byte Folded Reload sub.d $a7, $a3, $a6 move $a6, $a4 bltu $a7, $s6, .LBB5_67 @@ -2881,7 +2878,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd alsl.d $t2, $a4, $a1, 3 addi.d $t2, $t2, 32 move $t3, $a7 - xvld $xr4, $sp, 224 # 32-byte Folded Reload + xvld $xr4, $sp, 208 # 32-byte Folded Reload .p2align 4, , 16 .LBB5_85: # %vector.body333 # Parent Loop BB5_65 Depth=1 @@ -2949,19 +2946,19 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd beq $a0, $a6, .LBB5_64 b .LBB5_73 .LBB5_92: # %._crit_edge237 - fld.d $fs0, $sp, 352 # 8-byte Folded Reload - ld.d $s8, $sp, 360 # 8-byte Folded Reload - ld.d $s7, $sp, 368 # 8-byte Folded Reload - ld.d $s6, $sp, 376 # 8-byte Folded Reload - ld.d $s5, $sp, 384 # 8-byte Folded Reload - ld.d $s4, $sp, 392 # 8-byte Folded Reload - ld.d $s3, $sp, 400 # 8-byte Folded Reload - ld.d $s2, $sp, 408 # 8-byte Folded Reload - ld.d $s1, $sp, 416 # 8-byte Folded Reload - ld.d $s0, $sp, 424 # 8-byte Folded Reload - ld.d $fp, $sp, 432 # 8-byte Folded Reload - ld.d $ra, $sp, 440 # 8-byte Folded Reload - addi.d $sp, $sp, 448 + fld.d $fs0, $sp, 336 # 8-byte Folded Reload + ld.d $s8, $sp, 344 # 8-byte Folded Reload + ld.d $s7, $sp, 352 # 8-byte Folded Reload + ld.d $s6, $sp, 360 # 8-byte Folded Reload + ld.d $s5, $sp, 368 # 8-byte Folded Reload + ld.d $s4, $sp, 376 # 8-byte Folded Reload + ld.d $s3, $sp, 384 # 8-byte Folded Reload + ld.d $s2, $sp, 392 # 8-byte Folded Reload + ld.d $s1, $sp, 400 # 8-byte Folded Reload + ld.d $s0, $sp, 408 # 8-byte Folded Reload + ld.d $fp, $sp, 416 # 8-byte Folded Reload + ld.d $ra, $sp, 424 # 8-byte Folded Reload + addi.d $sp, $sp, 432 ret .Lfunc_end5: .size _ZN5Hydro7doCycleEd, .Lfunc_end5-_ZN5Hydro7doCycleEd @@ -3303,8 +3300,7 @@ _ZN5Hydro10advPosFullEPK7double2S2_S2_dPS0_S3_ii: # @_ZN5Hydro10advPosFullEPK7do alsl.d $a1, $a6, $a1, 4 alsl.d $a4, $a6, $a4, 4 sub.d $a5, $a7, $a6 - lu52i.d $a6, $zero, 1022 - vreplgr2vr.d $vr1, $a6 + vldi $vr1, -928 .p2align 4, , 16 .LBB11_2: # %.lr.ph # =>This Inner Loop Header: Depth=1 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Mesh.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Mesh.s index c4d32f68..e8532e37 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Mesh.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Mesh.s @@ -3762,8 +3762,7 @@ _ZN4Mesh8calcCtrsEPK7double2PS0_S3_ii: # @_ZN4Mesh8calcCtrsEPK7double2PS0_S3_ii alsl.d $a2, $s5, $a2, 2 alsl.d $a3, $s5, $a3, 2 sub.d $a4, $s4, $s5 - lu52i.d $a5, $zero, 1022 - vreplgr2vr.d $vr0, $a5 + vldi $vr0, -928 .p2align 4, , 16 .LBB9_4: # =>This Inner Loop Header: Depth=1 ld.w $a5, $a1, 0 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/PolyGas.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/PolyGas.s index 58129c8d..400aad02 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/PolyGas.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/PolyGas.s @@ -373,10 +373,8 @@ _ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii: # @_ZN7PolyGas15calcSta bstrins.d $t2, $zero, 1, 0 add.d $a7, $t2, $ra xvreplve0.d $xr0, $xr10 - lu52i.d $t3, $zero, 1023 - xvreplgr2vr.d $xr1, $t3 - lu52i.d $t3, $zero, 1022 - xvreplgr2vr.d $xr2, $t3 + xvldi $xr1, -912 + xvldi $xr2, -928 move $t3, $t2 move $t4, $a0 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s index 7eb3009e..34a77ccd 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s @@ -9650,8 +9650,7 @@ _ZN6miniFE16impose_dirichletINS_9CSRMatrixIdiiEENS_6VectorIdiiEEEEvNT_10ScalarTy st.d $a1, $sp, 8 # 8-byte Folded Spill vldi $vr4, -912 movgr2fr.d $fs1, $zero - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr5, $a1 + xvldi $xr5, -912 xvst $xr5, $sp, 48 # 32-byte Folded Spill b .LBB12_5 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/solver.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/solver.s index c24f2259..bd87ffea 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/solver.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/solver.s @@ -533,14 +533,10 @@ attenuate_fluxes: # @attenuate_fluxes slli.d $a1, $a1, 3 xvreplve0.w $xr0, $xr1 xvreplve0.w $xr3, $xr2 - lu12i.w $a2, -261120 - xvreplgr2vr.w $xr4, $a2 - lu12i.w $a2, 265216 - xvreplgr2vr.w $xr6, $a2 - lu12i.w $a2, -259072 - xvreplgr2vr.w $xr7, $a2 - lu12i.w $a2, 263168 - xvreplgr2vr.w $xr8, $a2 + xvldi $xr4, -1400 + xvldi $xr6, -1512 + xvldi $xr7, -1384 + xvldi $xr8, -1528 move $a2, $t6 move $s2, $t4 move $s3, $t7 @@ -897,7 +893,6 @@ attenuate_fluxes: # @attenuate_fluxes bltu $a5, $a2, .LBB0_95 # %bb.119: # %vector.main.loop.iter.check907 ori $a1, $zero, 16 - lu12i.w $a2, 260096 bgeu $a7, $a1, .LBB0_195 # %bb.120: move $a1, $zero @@ -978,8 +973,7 @@ attenuate_fluxes: # @attenuate_fluxes xvreplve0.w $xr9, $xr7 xvreplve0.w $xr10, $xr8 xvreplve0.w $xr11, $xr6 - lu12i.w $t1, -262144 - xvreplgr2vr.w $xr12, $t1 + xvldi $xr12, -3136 move $t1, $t8 move $s1, $s3 move $s6, $s4 @@ -1066,8 +1060,7 @@ attenuate_fluxes: # @attenuate_fluxes xvreplve0.w $xr9, $xr7 xvreplve0.w $xr10, $xr8 xvreplve0.w $xr11, $xr6 - lu12i.w $a5, -262144 - xvreplgr2vr.w $xr12, $a5 + xvldi $xr12, -3136 move $t1, $t8 move $s1, $s3 move $s6, $s4 @@ -1154,8 +1147,7 @@ attenuate_fluxes: # @attenuate_fluxes xvreplve0.w $xr9, $xr7 xvreplve0.w $xr10, $xr8 xvreplve0.w $xr11, $xr6 - lu12i.w $a5, -262144 - xvreplgr2vr.w $xr12, $a5 + xvldi $xr12, -3136 move $t1, $t8 move $s1, $s3 move $s6, $s4 @@ -1360,56 +1352,56 @@ attenuate_fluxes: # @attenuate_fluxes .LBB0_195: # %vector.ph908 bstrpick.d $a1, $t8, 30, 4 slli.d $a1, $a1, 4 - addi.d $a5, $s2, 32 - addi.d $a6, $t0, 32 - addi.d $t1, $a0, 32 - xvreplgr2vr.w $xr0, $a2 - move $t2, $a1 + addi.d $a2, $s2, 32 + addi.d $a5, $t0, 32 + addi.d $a6, $a0, 32 + xvldi $xr0, -1424 + move $t1, $a1 .p2align 4, , 16 .LBB0_196: # %vector.body911 # =>This Inner Loop Header: Depth=1 - xvld $xr1, $a6, -32 - xvld $xr2, $a6, 0 - xvld $xr3, $t1, -32 - xvld $xr4, $t1, 0 + xvld $xr1, $a5, -32 + xvld $xr2, $a5, 0 + xvld $xr3, $a6, -32 + xvld $xr4, $a6, 0 xvfsub.s $xr1, $xr0, $xr1 xvfsub.s $xr2, $xr0, $xr2 xvfmul.s $xr1, $xr3, $xr1 xvfmul.s $xr2, $xr4, $xr2 - xvst $xr1, $a5, -32 - xvst $xr2, $a5, 0 - addi.d $t2, $t2, -16 + xvst $xr1, $a2, -32 + xvst $xr2, $a2, 0 + addi.d $t1, $t1, -16 + addi.d $a2, $a2, 64 addi.d $a5, $a5, 64 addi.d $a6, $a6, 64 - addi.d $t1, $t1, 64 - bnez $t2, .LBB0_196 + bnez $t1, .LBB0_196 # %bb.197: # %middle.block918 beq $a1, $t8, .LBB0_97 # %bb.198: # %vec.epilog.iter.check923 - andi $a5, $t8, 12 - beqz $a5, .LBB0_95 + andi $a2, $t8, 12 + beqz $a2, .LBB0_95 .LBB0_199: # %vec.epilog.ph922 - move $t2, $a1 + move $t1, $a1 bstrpick.d $a1, $t8, 30, 2 slli.d $a1, $a1, 2 - sub.d $a5, $t2, $a1 - alsl.d $a6, $t2, $s2, 2 - alsl.d $t1, $t2, $t0, 2 - alsl.d $t2, $t2, $a0, 2 - vreplgr2vr.w $vr0, $a2 + sub.d $a2, $t1, $a1 + alsl.d $a5, $t1, $s2, 2 + alsl.d $a6, $t1, $t0, 2 + alsl.d $t1, $t1, $a0, 2 + vldi $vr0, -1424 .p2align 4, , 16 .LBB0_200: # %vec.epilog.vector.body928 # =>This Inner Loop Header: Depth=1 - vld $vr1, $t1, 0 - vld $vr2, $t2, 0 + vld $vr1, $a6, 0 + vld $vr2, $t1, 0 vfsub.s $vr1, $vr0, $vr1 vfmul.s $vr1, $vr2, $vr1 - vst $vr1, $a6, 0 - addi.d $a5, $a5, 4 + vst $vr1, $a5, 0 + addi.d $a2, $a2, 4 + addi.d $a5, $a5, 16 addi.d $a6, $a6, 16 addi.d $t1, $t1, 16 - addi.d $t2, $t2, 16 - bnez $a5, .LBB0_200 + bnez $a2, .LBB0_200 # %bb.201: # %vec.epilog.middle.block933 bne $a1, $t8, .LBB0_95 b .LBB0_97 @@ -1421,8 +1413,7 @@ attenuate_fluxes: # @attenuate_fluxes addi.d $s2, $t0, 32 addi.d $s3, $t5, 32 addi.d $s4, $t7, 32 - lu12i.w $a5, -262144 - xvreplgr2vr.w $xr0, $a5 + xvldi $xr0, -3136 move $s5, $a2 .p2align 4, , 16 .LBB0_203: # %vector.body618 @@ -1460,22 +1451,21 @@ attenuate_fluxes: # @attenuate_fluxes andi $a5, $t8, 12 beqz $a5, .LBB0_37 .LBB0_206: # %vec.epilog.ph - move $a5, $a2 + move $s4, $a2 bstrpick.d $a2, $t8, 30, 2 slli.d $a2, $a2, 2 - sub.d $t1, $a5, $a2 - alsl.d $s1, $a5, $t2, 2 - alsl.d $s2, $a5, $t5, 2 - alsl.d $s3, $a5, $t7, 2 - alsl.d $s4, $a5, $t0, 2 - alsl.d $a5, $a5, $t4, 2 - lu12i.w $s5, -262144 - vreplgr2vr.w $vr0, $s5 + sub.d $t1, $s4, $a2 + alsl.d $s1, $s4, $t2, 2 + alsl.d $s2, $s4, $t5, 2 + alsl.d $s3, $s4, $t7, 2 + alsl.d $a5, $s4, $t0, 2 + alsl.d $s4, $s4, $t4, 2 + vldi $vr0, -3136 .p2align 4, , 16 .LBB0_207: # %vec.epilog.vector.body # =>This Inner Loop Header: Depth=1 - vld $vr3, $a5, 0 - vld $vr4, $s4, 0 + vld $vr3, $s4, 0 + vld $vr4, $a5, 0 vld $vr6, $s3, 0 vld $vr7, $s2, 0 vfadd.s $vr8, $vr3, $vr0 @@ -1488,8 +1478,8 @@ attenuate_fluxes: # @attenuate_fluxes addi.d $s1, $s1, 16 addi.d $s2, $s2, 16 addi.d $s3, $s3, 16 - addi.d $s4, $s4, 16 addi.d $a5, $a5, 16 + addi.d $s4, $s4, 16 bnez $t1, .LBB0_207 # %bb.208: # %vec.epilog.middle.block bne $a2, $t8, .LBB0_37 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/tracks.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/tracks.s index 8bdff0b1..c9c30476 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/tracks.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/tracks.s @@ -715,8 +715,7 @@ generate_polar_angles: # @generate_polar_angles pcalau12i $a2, %pc_hi20(.LCPI6_0) xvld $xr1, $a2, %pc_lo12(.LCPI6_0) xvreplve0.d $xr2, $xr0 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr3, $a2 + xvldi $xr3, -928 lu12i.w $a2, 345154 ori $a2, $a2, 3352 lu32i.d $a2, -450053 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/comm.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/comm.s index 5fdcdfb8..cf7144ac 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/comm.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/comm.s @@ -1502,8 +1502,7 @@ on_proc_comm_diff: # @on_proc_comm_diff st.d $a0, $sp, 72 # 8-byte Folded Spill slli.d $a0, $a4, 3 st.d $a0, $sp, 64 # 8-byte Folded Spill - lu52i.d $a0, $zero, 1021 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -944 st.d $s2, $sp, 144 # 8-byte Folded Spill b .LBB2_18 .p2align 4, , 16 @@ -2212,8 +2211,7 @@ on_proc_comm_diff: # @on_proc_comm_diff vldi $vr0, -944 slli.d $a0, $a2, 3 st.d $a0, $sp, 56 # 8-byte Folded Spill - lu52i.d $a0, $zero, 1021 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -944 st.d $a4, $sp, 120 # 8-byte Folded Spill b .LBB2_85 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/stencil.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/stencil.s index bda69a28..016a317c 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/stencil.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/stencil.s @@ -27,7 +27,7 @@ stencil_calc: # @stencil_calc pcalau12i $a1, %pc_hi20(x_block_size) ld.w $a1, $a1, %pc_lo12(x_block_size) st.d $a0, $fp, -384 # 8-byte Folded Spill - st.d $a1, $fp, -296 # 8-byte Folded Spill + st.d $a1, $fp, -264 # 8-byte Folded Spill addi.d $a0, $a1, 2 pcalau12i $a1, %pc_hi20(y_block_size) ld.wu $a1, $a1, %pc_lo12(y_block_size) @@ -71,7 +71,7 @@ stencil_calc: # @stencil_calc pcalau12i $a4, %pc_hi20(blocks) ld.d $a4, $a4, %pc_lo12(blocks) st.d $a4, $fp, -224 # 8-byte Folded Spill - ld.d $t0, $fp, -296 # 8-byte Folded Reload + ld.d $t0, $fp, -264 # 8-byte Folded Reload slti $a4, $t0, 1 ld.d $a7, $fp, -360 # 8-byte Folded Reload slti $a5, $a7, 1 @@ -106,11 +106,8 @@ stencil_calc: # @stencil_calc slli.d $a1, $a1, 3 st.d $a1, $fp, -176 # 8-byte Folded Spill slli.d $s6, $a0, 3 - ori $a0, $zero, 0 - lu32i.d $a0, -262144 - lu52i.d $a0, $a0, 1025 - xvreplgr2vr.d $xr0, $a0 - xvst $xr0, $fp, -288 # 32-byte Folded Spill + xvldi $xr0, -996 + xvst $xr0, $fp, -304 # 32-byte Folded Spill st.d $a2, $fp, -152 # 8-byte Folded Spill b .LBB0_4 .p2align 4, , 16 @@ -137,7 +134,7 @@ stencil_calc: # @stencil_calc ldx.w $a1, $a1, $a0 bltz $a1, .LBB0_3 # %bb.5: # in Loop: Header=BB0_4 Depth=1 - ld.d $a1, $fp, -296 # 8-byte Folded Reload + ld.d $a1, $fp, -264 # 8-byte Folded Reload blez $a1, .LBB0_3 # %bb.6: # in Loop: Header=BB0_4 Depth=1 ld.d $a1, $fp, -360 # 8-byte Folded Reload @@ -159,8 +156,8 @@ stencil_calc: # @stencil_calc ori $a3, $zero, 1 ld.d $t1, $fp, -256 # 8-byte Folded Reload ld.d $a4, $fp, -240 # 8-byte Folded Reload - xvld $xr5, $fp, -288 # 32-byte Folded Reload - vldi $vr6, -996 + vldi $vr5, -996 + xvld $xr6, $fp, -304 # 32-byte Folded Reload b .LBB0_10 .p2align 4, , 16 .LBB0_9: # %._crit_edge294.split @@ -247,7 +244,7 @@ stencil_calc: # @stencil_calc xvfadd.d $xr1, $xr1, $xr0 xvfadd.d $xr1, $xr1, $xr3 xvfadd.d $xr1, $xr1, $xr4 - xvfdiv.d $xr1, $xr1, $xr5 + xvfdiv.d $xr1, $xr1, $xr6 xvst $xr1, $t2, 0 addi.d $s2, $s2, 32 addi.d $ra, $ra, 32 @@ -292,7 +289,7 @@ stencil_calc: # @stencil_calc fadd.d $fa1, $fa1, $fa0 fadd.d $fa1, $fa1, $fa2 fadd.d $fa1, $fa1, $fa3 - fdiv.d $fa1, $fa1, $fa6 + fdiv.d $fa1, $fa1, $fa5 fst.d $fa1, $a0, 0 addi.d $a0, $a0, 8 addi.d $a1, $a1, 8 @@ -355,7 +352,7 @@ stencil_calc: # @stencil_calc addi.d $a1, $a1, 1 ld.d $a0, $fp, -176 # 8-byte Folded Reload add.d $s0, $s0, $a0 - ld.d $a0, $fp, -296 # 8-byte Folded Reload + ld.d $a0, $fp, -264 # 8-byte Folded Reload bne $a1, $a0, .LBB0_21 b .LBB0_3 .LBB0_24: # %.preheader264 @@ -368,7 +365,7 @@ stencil_calc: # @stencil_calc pcalau12i $a4, %pc_hi20(blocks) ld.d $a4, $a4, %pc_lo12(blocks) st.d $a4, $fp, -376 # 8-byte Folded Spill - ld.d $t0, $fp, -296 # 8-byte Folded Reload + ld.d $t0, $fp, -264 # 8-byte Folded Reload slti $a4, $t0, 1 ld.d $a7, $fp, -360 # 8-byte Folded Reload slti $a5, $a7, 1 @@ -381,14 +378,14 @@ stencil_calc: # @stencil_calc bstrpick.d $a3, $t0, 31, 0 st.d $a3, $fp, -328 # 8-byte Folded Spill bstrpick.d $t0, $a7, 31, 0 - st.d $t1, $fp, -176 # 8-byte Folded Spill + st.d $t1, $fp, -168 # 8-byte Folded Spill bstrpick.d $t1, $t1, 31, 0 slli.d $a3, $a1, 3 addi.d $a3, $a3, 8 mul.d $a3, $a3, $a0 mul.d $a7, $a0, $a1 slli.d $t4, $a7, 3 - st.d $t0, $fp, -184 # 8-byte Folded Spill + st.d $t0, $fp, -176 # 8-byte Folded Spill add.d $a1, $a1, $t0 slli.d $a1, $a1, 3 addi.d $a1, $a1, -8 @@ -401,25 +398,22 @@ stencil_calc: # @stencil_calc addi.d $a3, $a3, 8 st.d $a3, $fp, -304 # 8-byte Folded Spill add.d $a1, $t3, $a1 - st.d $t1, $fp, -192 # 8-byte Folded Spill + st.d $t1, $fp, -184 # 8-byte Folded Spill alsl.d $a1, $t1, $a1, 3 st.d $a1, $fp, -336 # 8-byte Folded Spill ori $a1, $zero, 1 move $a3, $a7 bstrins.d $a3, $a1, 1, 0 - st.d $a3, $fp, -224 # 8-byte Folded Spill - st.d $a7, $fp, -216 # 8-byte Folded Spill - bstrins.d $a7, $zero, 1, 0 + st.d $a3, $fp, -216 # 8-byte Folded Spill st.d $a7, $fp, -208 # 8-byte Folded Spill + bstrins.d $a7, $zero, 1, 0 + st.d $a7, $fp, -200 # 8-byte Folded Spill or $a1, $a4, $a5 or $a1, $a1, $a6 st.d $a1, $fp, -424 # 8-byte Folded Spill slli.d $s8, $a0, 3 - ori $a0, $zero, 0 - lu32i.d $a0, -327680 - lu52i.d $s1, $a0, 1027 - st.d $a2, $fp, -200 # 8-byte Folded Spill - st.d $t4, $fp, -288 # 8-byte Folded Spill + st.d $a2, $fp, -192 # 8-byte Folded Spill + st.d $t4, $fp, -256 # 8-byte Folded Spill b .LBB0_27 .p2align 4, , 16 .LBB0_26: # %.loopexit262 @@ -445,7 +439,7 @@ stencil_calc: # @stencil_calc ldx.w $a1, $a1, $a0 bltz $a1, .LBB0_26 # %bb.28: # in Loop: Header=BB0_27 Depth=1 - ld.d $a1, $fp, -296 # 8-byte Folded Reload + ld.d $a1, $fp, -264 # 8-byte Folded Reload blez $a1, .LBB0_26 # %bb.29: # in Loop: Header=BB0_27 Depth=1 ld.d $a1, $fp, -360 # 8-byte Folded Reload @@ -466,22 +460,22 @@ stencil_calc: # @stencil_calc st.d $a0, $fp, -312 # 8-byte Folded Spill move $a1, $zero ori $a2, $zero, 1 - ld.d $t0, $fp, -416 # 8-byte Folded Reload + ld.d $a7, $fp, -416 # 8-byte Folded Reload ld.d $t7, $fp, -304 # 8-byte Folded Reload vldi $vr29, -965 b .LBB0_33 .p2align 4, , 16 .LBB0_32: # %._crit_edge270.split # in Loop: Header=BB0_33 Depth=2 - ld.d $a2, $fp, -240 # 8-byte Folded Reload + ld.d $a2, $fp, -232 # 8-byte Folded Reload addi.d $a2, $a2, 1 - ld.d $a1, $fp, -232 # 8-byte Folded Reload + ld.d $a1, $fp, -224 # 8-byte Folded Reload addi.d $a1, $a1, 1 - ld.d $t4, $fp, -288 # 8-byte Folded Reload - ld.d $t7, $fp, -256 # 8-byte Folded Reload + ld.d $t4, $fp, -256 # 8-byte Folded Reload + ld.d $t7, $fp, -248 # 8-byte Folded Reload add.d $t7, $t7, $t4 - ld.d $t0, $fp, -248 # 8-byte Folded Reload - add.d $t0, $t0, $t4 + ld.d $a7, $fp, -240 # 8-byte Folded Reload + add.d $a7, $a7, $t4 ld.d $a0, $fp, -328 # 8-byte Folded Reload beq $a2, $a0, .LBB0_52 .LBB0_33: # %.preheader260 @@ -490,24 +484,23 @@ stencil_calc: # @stencil_calc # Child Loop BB0_35 Depth 3 # Child Loop BB0_49 Depth 4 # Child Loop BB0_38 Depth 4 - st.d $a1, $fp, -232 # 8-byte Folded Spill + st.d $a1, $fp, -224 # 8-byte Folded Spill mul.d $a0, $t4, $a1 ld.d $a3, $fp, -312 # 8-byte Folded Reload alsl.d $a1, $a2, $a3, 3 ld.d $a4, $a1, -8 - st.d $a2, $fp, -240 # 8-byte Folded Spill + st.d $a2, $fp, -232 # 8-byte Folded Spill slli.d $a2, $a2, 3 ldx.d $a2, $a3, $a2 - ld.d $a1, $a1, 8 + ld.d $t1, $a1, 8 ld.d $s2, $a4, 0 st.d $a4, $fp, -152 # 8-byte Folded Spill ld.d $t8, $a4, 8 ld.d $ra, $a2, 0 st.d $a2, $fp, -160 # 8-byte Folded Spill ld.d $t2, $a2, 8 - ld.d $a3, $a1, 0 - st.d $a1, $fp, -168 # 8-byte Folded Spill - ld.d $t3, $a1, 8 + ld.d $a3, $t1, 0 + ld.d $t3, $t1, 8 fld.d $fa6, $t8, 0 fld.d $ft2, $t8, 8 fld.d $fa7, $t2, 0 @@ -518,15 +511,15 @@ stencil_calc: # @stencil_calc add.d $t4, $a1, $a0 ld.d $a1, $fp, -336 # 8-byte Folded Reload add.d $t5, $a1, $a0 - st.d $t0, $fp, -248 # 8-byte Folded Spill - st.d $t7, $fp, -256 # 8-byte Folded Spill + st.d $a7, $fp, -240 # 8-byte Folded Spill + st.d $t7, $fp, -248 # 8-byte Folded Spill ori $a0, $zero, 1 b .LBB0_35 .p2align 4, , 16 .LBB0_34: # %._crit_edge # in Loop: Header=BB0_35 Depth=3 add.d $t7, $t7, $s8 - add.d $t0, $t0, $s8 + add.d $a7, $a7, $s8 fmov.d $ft1, $fa5 fmov.d $ft0, $fa4 fmov.d $ft3, $fa3 @@ -536,7 +529,7 @@ stencil_calc: # @stencil_calc move $a3, $a5 move $ra, $a2 move $s2, $a1 - ld.d $a1, $fp, -184 # 8-byte Folded Reload + ld.d $a1, $fp, -176 # 8-byte Folded Reload beq $a0, $a1, .LBB0_32 .LBB0_35: # %.preheader258 # Parent Loop BB0_27 Depth=1 @@ -553,16 +546,15 @@ stencil_calc: # @stencil_calc move $a5, $t3 ld.d $a6, $fp, -160 # 8-byte Folded Reload ldx.d $t2, $a6, $a4 - ld.d $a6, $fp, -168 # 8-byte Folded Reload - ldx.d $t3, $a6, $a4 + ldx.d $t3, $t1, $a4 fld.d $fa0, $t8, 0 fld.d $fa1, $t8, 8 fld.d $fa2, $t2, 0 fld.d $fa3, $t2, 8 fld.d $fa4, $t3, 0 fld.d $fa5, $t3, 8 - ori $a7, $zero, 1 - ld.d $a4, $fp, -176 # 8-byte Folded Reload + ori $s0, $zero, 1 + ld.d $a4, $fp, -168 # 8-byte Folded Reload ori $a6, $zero, 5 bgeu $a4, $a6, .LBB0_39 .LBB0_36: # in Loop: Header=BB0_35 Depth=3 @@ -574,32 +566,32 @@ stencil_calc: # @stencil_calc fmov.d $ft4, $fa0 .LBB0_37: # %scalar.ph.preheader # in Loop: Header=BB0_35 Depth=3 - slli.d $a4, $a7, 3 + slli.d $a4, $s0, 3 addi.d $a6, $a1, 8 - addi.d $s0, $t8, 8 + addi.d $s1, $t8, 8 addi.d $s3, $a2, 8 addi.d $s5, $t2, 8 - ld.d $t1, $fp, -192 # 8-byte Folded Reload - sub.d $a7, $t1, $a7 + ld.d $t0, $fp, -184 # 8-byte Folded Reload + sub.d $s0, $t0, $s0 addi.d $s6, $t3, 8 addi.d $s7, $a5, 8 addi.d $a3, $a3, 8 - move $t6, $t0 + move $t6, $a7 .p2align 4, , 16 .LBB0_38: # %scalar.ph # Parent Loop BB0_27 Depth=1 # Parent Loop BB0_33 Depth=2 # Parent Loop BB0_35 Depth=3 # => This Inner Loop Header: Depth=4 - add.d $t1, $s2, $a4 - fld.d $ft10, $t1, -8 + add.d $t0, $s2, $a4 + fld.d $ft10, $t0, -8 fldx.d $ft11, $s2, $a4 fadd.d $ft10, $ft10, $ft11 - fld.d $ft11, $t1, 8 - add.d $t1, $ra, $a4 - fld.d $ft12, $t1, -8 + fld.d $ft11, $t0, 8 + add.d $t0, $ra, $a4 + fld.d $ft12, $t0, -8 fldx.d $ft13, $ra, $a4 - fld.d $ft14, $t1, 8 + fld.d $ft14, $t0, 8 fadd.d $ft10, $ft10, $ft11 fadd.d $ft10, $ft10, $fa6 fmov.d $fa6, $ft2 @@ -607,9 +599,9 @@ stencil_calc: # @stencil_calc fadd.d $ft2, $ft2, $ft14 fadd.d $ft11, $ft2, $fa7 fmov.d $fa7, $ft3 - add.d $t1, $a3, $a4 - fld.d $ft3, $t1, -16 - fld.d $ft12, $t1, -8 + add.d $t0, $a3, $a4 + fld.d $ft3, $t0, -16 + fld.d $ft12, $t0, -8 fldx.d $ft13, $a3, $a4 fadd.d $ft10, $ft10, $fa6 fldx.d $ft2, $a6, $a4 @@ -631,7 +623,7 @@ stencil_calc: # @stencil_calc fadd.d $ft0, $ft0, $ft6 fmov.d $ft6, $ft7 fadd.d $ft13, $ft4, $ft10 - fldx.d $ft8, $s0, $a4 + fldx.d $ft8, $s1, $a4 fadd.d $ft14, $ft5, $ft9 fldx.d $ft9, $s5, $a4 fldx.d $ft7, $s6, $a4 @@ -650,17 +642,17 @@ stencil_calc: # @stencil_calc fdiv.d $ft10, $ft10, $fs5 fstx.d $ft10, $t6, $a4 addi.d $a6, $a6, 8 - addi.d $s0, $s0, 8 + addi.d $s1, $s1, 8 addi.d $s3, $s3, 8 addi.d $t6, $t6, 8 addi.d $s5, $s5, 8 addi.d $ra, $ra, 8 addi.d $s2, $s2, 8 - addi.d $a7, $a7, -1 + addi.d $s0, $s0, -1 addi.d $s6, $s6, 8 addi.d $s7, $s7, 8 addi.d $a3, $a3, 8 - bnez $a7, .LBB0_38 + bnez $s0, .LBB0_38 b .LBB0_34 .p2align 4, , 16 .LBB0_39: # %vector.memcheck @@ -683,58 +675,58 @@ stencil_calc: # @stencil_calc addi.d $a4, $t8, 16 add.d $t6, $t8, $s4 sltu $t6, $t4, $t6 - sltu $s0, $a4, $t5 - and $t6, $t6, $s0 + sltu $s1, $a4, $t5 + and $t6, $t6, $s1 bnez $t6, .LBB0_36 # %bb.42: # %vector.memcheck # in Loop: Header=BB0_35 Depth=3 - addi.d $s5, $a2, 16 + addi.d $s1, $a2, 16 add.d $t6, $a2, $s4 sltu $t6, $t4, $t6 - sltu $s0, $s5, $t5 - and $t6, $t6, $s0 + sltu $s3, $s1, $t5 + and $t6, $t6, $s3 bnez $t6, .LBB0_36 # %bb.43: # %vector.memcheck # in Loop: Header=BB0_35 Depth=3 add.d $t6, $ra, $s4 sltu $t6, $t4, $t6 - sltu $s0, $ra, $t5 - and $t6, $t6, $s0 + sltu $s3, $ra, $t5 + and $t6, $t6, $s3 bnez $t6, .LBB0_36 # %bb.44: # %vector.memcheck # in Loop: Header=BB0_35 Depth=3 - addi.d $s3, $t2, 16 + addi.d $s5, $t2, 16 add.d $t6, $t2, $s4 sltu $t6, $t4, $t6 - sltu $s0, $s3, $t5 - and $t6, $t6, $s0 + sltu $s3, $s5, $t5 + and $t6, $t6, $s3 bnez $t6, .LBB0_36 # %bb.45: # %vector.memcheck # in Loop: Header=BB0_35 Depth=3 - addi.d $s7, $a5, 16 + addi.d $s3, $a5, 16 add.d $t6, $a5, $s4 sltu $t6, $t4, $t6 - sltu $s0, $s7, $t5 - and $t6, $t6, $s0 + sltu $s6, $s3, $t5 + and $t6, $t6, $s6 bnez $t6, .LBB0_36 # %bb.46: # %vector.memcheck # in Loop: Header=BB0_35 Depth=3 add.d $t6, $a3, $s4 sltu $t6, $t4, $t6 - sltu $s0, $a3, $t5 - and $t6, $t6, $s0 + sltu $s6, $a3, $t5 + and $t6, $t6, $s6 bnez $t6, .LBB0_36 # %bb.47: # %vector.memcheck # in Loop: Header=BB0_35 Depth=3 - addi.d $s6, $t3, 16 + addi.d $s7, $t3, 16 add.d $t6, $t3, $s4 sltu $t6, $t4, $t6 - sltu $s0, $s6, $t5 - and $t6, $t6, $s0 + sltu $s6, $s7, $t5 + and $t6, $t6, $s6 bnez $t6, .LBB0_36 # %bb.48: # %vector.ph # in Loop: Header=BB0_35 Depth=3 - move $s0, $zero + move $s6, $zero xvinsve0.d $xr24, $xr5, 3 xvinsve0.d $xr15, $xr4, 3 xvinsve0.d $xr23, $xr9, 3 @@ -747,7 +739,7 @@ stencil_calc: # @stencil_calc xvinsve0.d $xr18, $xr0, 3 xvinsve0.d $xr10, $xr10, 3 xvinsve0.d $xr9, $xr6, 3 - ld.d $a7, $fp, -208 # 8-byte Folded Reload + ld.d $s0, $fp, -200 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_49: # %vector.body # Parent Loop BB0_27 Depth=1 @@ -755,11 +747,11 @@ stencil_calc: # @stencil_calc # Parent Loop BB0_35 Depth=3 # => This Inner Loop Header: Depth=4 xvori.b $xr6, $xr10, 0 - add.d $t6, $s2, $s0 - xvldx $xr7, $s2, $s0 + add.d $t6, $s2, $s6 + xvldx $xr7, $s2, $s6 xvld $xr12, $t6, 8 xvld $xr13, $t6, 16 - xvldx $xr10, $a6, $s0 + xvldx $xr10, $a6, $s6 xvpermi.d $xr6, $xr6, 78 xvori.b $xr8, $xr23, 0 xvori.b $xr14, $xr24, 0 @@ -781,7 +773,7 @@ stencil_calc: # @stencil_calc xvpickve.d $xr12, $xr9, 2 xvinsve0.d $xr13, $xr12, 3 xvpermi.d $xr12, $xr16, 78 - xvldx $xr16, $a4, $s0 + xvldx $xr16, $a4, $s6 xvfadd.d $xr7, $xr7, $xr13 xvfadd.d $xr7, $xr7, $xr9 xvfadd.d $xr7, $xr7, $xr10 @@ -802,12 +794,12 @@ stencil_calc: # @stencil_calc xvinsve0.d $xr21, $xr13, 3 xvfadd.d $xr7, $xr7, $xr21 xvfadd.d $xr7, $xr7, $xr18 - add.d $t6, $ra, $s0 - xvldx $xr13, $ra, $s0 + add.d $t6, $ra, $s6 + xvldx $xr13, $ra, $s6 xvld $xr23, $t6, 8 xvld $xr24, $t6, 16 xvpermi.d $xr25, $xr11, 78 - xvldx $xr11, $s5, $s0 + xvldx $xr11, $s1, $s6 xvfadd.d $xr21, $xr7, $xr16 xvfadd.d $xr7, $xr13, $xr23 xvfadd.d $xr13, $xr7, $xr24 @@ -827,7 +819,7 @@ stencil_calc: # @stencil_calc xvpickve.d $xr24, $xr22, 2 xvinsve0.d $xr23, $xr24, 3 xvpermi.d $xr24, $xr20, 78 - xvldx $xr20, $s3, $s0 + xvldx $xr20, $s5, $s6 xvfadd.d $xr13, $xr13, $xr23 xvfadd.d $xr13, $xr13, $xr22 xvfadd.d $xr23, $xr13, $xr11 @@ -848,12 +840,12 @@ stencil_calc: # @stencil_calc xvinsve0.d $xr25, $xr24, 3 xvfadd.d $xr23, $xr23, $xr25 xvfadd.d $xr24, $xr23, $xr19 - add.d $t6, $a3, $s0 - xvldx $xr26, $a3, $s0 + add.d $t6, $a3, $s6 + xvldx $xr26, $a3, $s6 xvld $xr27, $t6, 8 xvld $xr28, $t6, 16 xvpermi.d $xr8, $xr8, 78 - xvldx $xr23, $s7, $s0 + xvldx $xr23, $s3, $s6 xvfadd.d $xr25, $xr24, $xr20 xvfadd.d $xr24, $xr26, $xr27 xvfadd.d $xr26, $xr24, $xr28 @@ -873,7 +865,7 @@ stencil_calc: # @stencil_calc xvpickve.d $xr24, $xr17, 2 xvinsve0.d $xr27, $xr24, 3 xvpermi.d $xr14, $xr14, 78 - xvldx $xr24, $s6, $s0 + xvldx $xr24, $s7, $s6 xvfadd.d $xr26, $xr26, $xr27 xvfadd.d $xr26, $xr26, $xr17 xvfadd.d $xr26, $xr26, $xr23 @@ -897,17 +889,17 @@ stencil_calc: # @stencil_calc xvfadd.d $xr15, $xr15, $xr24 xvfadd.d $xr21, $xr21, $xr25 xvfadd.d $xr15, $xr21, $xr15 - xvreplgr2vr.d $xr21, $s1 + xvldi $xr21, -965 xvfdiv.d $xr15, $xr15, $xr21 - xvstx $xr15, $t7, $s0 - addi.d $a7, $a7, -4 - addi.d $s0, $s0, 32 + xvstx $xr15, $t7, $s6 + addi.d $s0, $s0, -4 + addi.d $s6, $s6, 32 xvori.b $xr15, $xr28, 0 - bnez $a7, .LBB0_49 + bnez $s0, .LBB0_49 # %bb.50: # %middle.block # in Loop: Header=BB0_35 Depth=3 - ld.d $a4, $fp, -216 # 8-byte Folded Reload - ld.d $a6, $fp, -208 # 8-byte Folded Reload + ld.d $a4, $fp, -208 # 8-byte Folded Reload + ld.d $a6, $fp, -200 # 8-byte Folded Reload beq $a4, $a6, .LBB0_34 # %bb.51: # in Loop: Header=BB0_35 Depth=3 xvpickve.d $xr15, $xr24, 3 @@ -916,12 +908,12 @@ stencil_calc: # @stencil_calc xvpickve.d $xr11, $xr11, 3 xvpickve.d $xr16, $xr16, 3 xvpickve.d $xr10, $xr10, 3 - ld.d $a7, $fp, -224 # 8-byte Folded Reload + ld.d $s0, $fp, -216 # 8-byte Folded Reload b .LBB0_37 .p2align 4, , 16 .LBB0_52: # %.preheader261 # in Loop: Header=BB0_27 Depth=1 - ld.d $a2, $fp, -200 # 8-byte Folded Reload + ld.d $a2, $fp, -192 # 8-byte Folded Reload ld.d $a0, $fp, -424 # 8-byte Folded Reload bnez $a0, .LBB0_26 # %bb.53: # %.preheader259.lr.ph.split.split @@ -929,45 +921,43 @@ stencil_calc: # @stencil_calc ld.d $a0, $fp, -400 # 8-byte Folded Reload ld.d $a0, $a0, 184 ld.d $a1, $fp, -408 # 8-byte Folded Reload - ldx.d $a0, $a0, $a1 - st.d $a0, $fp, -152 # 8-byte Folded Spill - move $s3, $zero - ori $s5, $zero, 1 - ld.d $s6, $fp, -304 # 8-byte Folded Reload + ldx.d $s0, $a0, $a1 + move $s1, $zero + ori $s3, $zero, 1 + ld.d $s5, $fp, -304 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_54: # %.preheader259 # Parent Loop BB0_27 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_55 Depth 3 - slli.d $a0, $s5, 3 - ld.d $a1, $fp, -152 # 8-byte Folded Reload - ldx.d $a0, $a1, $a0 - addi.d $s7, $a0, 8 - ld.d $s0, $fp, -320 # 8-byte Folded Reload - move $s2, $s6 + slli.d $a0, $s3, 3 + ldx.d $a0, $s0, $a0 + addi.d $s6, $a0, 8 + ld.d $s7, $fp, -320 # 8-byte Folded Reload + move $s2, $s5 .p2align 4, , 16 .LBB0_55: # %.preheader257 # Parent Loop BB0_27 Depth=1 # Parent Loop BB0_54 Depth=2 # => This Inner Loop Header: Depth=3 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 addi.d $a0, $a0, 8 move $a1, $s2 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - ld.d $a2, $fp, -200 # 8-byte Folded Reload - addi.d $s7, $s7, 8 - addi.d $s0, $s0, -1 + ld.d $a2, $fp, -192 # 8-byte Folded Reload + addi.d $s6, $s6, 8 + addi.d $s7, $s7, -1 add.d $s2, $s2, $s8 - bnez $s0, .LBB0_55 + bnez $s7, .LBB0_55 # %bb.56: # %._crit_edge281.split # in Loop: Header=BB0_54 Depth=2 - addi.d $s5, $s5, 1 addi.d $s3, $s3, 1 - ld.d $t4, $fp, -288 # 8-byte Folded Reload - add.d $s6, $s6, $t4 - ld.d $a0, $fp, -296 # 8-byte Folded Reload - bne $s3, $a0, .LBB0_54 + addi.d $s1, $s1, 1 + ld.d $t4, $fp, -256 # 8-byte Folded Reload + add.d $s5, $s5, $t4 + ld.d $a0, $fp, -264 # 8-byte Folded Reload + bne $s1, $a0, .LBB0_54 b .LBB0_26 .LBB0_57: # %.loopexit256 addi.d $sp, $fp, -432 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/mg.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/mg.s index 4ef5c93f..e9814512 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/mg.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/mg.s @@ -381,8 +381,7 @@ create_domain: # @create_domain addi.d $a2, $s4, 1696 bstrpick.d $a1, $a4, 30, 2 slli.d $a1, $a1, 2 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 move $a3, $a1 .p2align 4, , 16 .LBB4_9: # %vector.body diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/operators.ompif.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/operators.ompif.s index a3826ba1..55d5bdeb 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/operators.ompif.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/operators.ompif.s @@ -3645,8 +3645,6 @@ restriction_betas: # @restriction_betas lu32i.d $a0, 1 st.d $a0, $sp, 208 # 8-byte Folded Spill vreplgr2vr.d $vr1, $a0 - lu52i.d $a0, $zero, 1021 - vreplgr2vr.d $vr2, $a0 b .LBB8_3 .p2align 4, , 16 .LBB8_2: # %._crit_edge224 @@ -3751,11 +3749,11 @@ restriction_betas: # @restriction_betas bstrpick.d $a0, $t1, 30, 1 slli.d $a0, $a0, 1 st.d $a0, $sp, 216 # 8-byte Folded Spill - vinsgr2vr.w $vr3, $t5, 0 - vinsgr2vr.w $vr3, $t5, 1 - vinsgr2vr.w $vr4, $t4, 0 - vinsgr2vr.w $vr4, $t4, 1 - vadd.w $vr5, $vr3, $vr4 + vinsgr2vr.w $vr2, $t5, 0 + vinsgr2vr.w $vr2, $t5, 1 + vinsgr2vr.w $vr3, $t4, 0 + vinsgr2vr.w $vr3, $t4, 1 + vadd.w $vr4, $vr2, $vr3 b .LBB8_8 .p2align 4, , 16 .LBB8_7: # %._crit_edge203.split.us.us.us @@ -3811,22 +3809,22 @@ restriction_betas: # @restriction_betas # Parent Loop BB8_9 Depth=3 # => This Inner Loop Header: Depth=4 slli.d $t0, $a6, 3 - fldx.d $fa6, $s1, $t0 + fldx.d $fa5, $s1, $t0 add.w $t0, $t5, $a6 slli.d $t0, $t0, 3 - fldx.d $fa7, $s1, $t0 + fldx.d $fa6, $s1, $t0 add.w $t0, $t4, $a6 slli.d $t0, $t0, 3 - fldx.d $ft0, $s1, $t0 + fldx.d $fa7, $s1, $t0 add.w $t0, $a4, $a6 slli.d $t0, $t0, 3 - fldx.d $ft1, $s1, $t0 - fadd.d $fa6, $fa6, $fa7 - fadd.d $fa6, $fa6, $ft0 - fadd.d $fa6, $fa6, $ft1 - fmul.d $fa6, $fa6, $fa0 + fldx.d $ft0, $s1, $t0 + fadd.d $fa5, $fa5, $fa6 + fadd.d $fa5, $fa5, $fa7 + fadd.d $fa5, $fa5, $ft0 + fmul.d $fa5, $fa5, $fa0 slli.d $t0, $a1, 3 - fstx.d $fa6, $fp, $t0 + fstx.d $fa5, $fp, $t0 addi.w $a6, $a6, 2 addi.d $s3, $s3, -1 addi.w $a1, $a1, 1 @@ -3914,41 +3912,29 @@ restriction_betas: # @restriction_betas mul.d $a1, $a0, $t5 ld.d $a6, $sp, 200 # 8-byte Folded Reload add.d $a1, $a1, $a6 - vinsgr2vr.w $vr6, $a1, 0 - vinsgr2vr.w $vr6, $a1, 1 + vinsgr2vr.w $vr5, $a1, 0 + vinsgr2vr.w $vr5, $a1, 1 move $s3, $s8 ld.d $a6, $sp, 216 # 8-byte Folded Reload - vori.b $vr7, $vr1, 0 + vori.b $vr6, $vr1, 0 .p2align 4, , 16 .LBB8_25: # %vector.body454 # Parent Loop BB8_3 Depth=1 # Parent Loop BB8_8 Depth=2 # Parent Loop BB8_9 Depth=3 # => This Inner Loop Header: Depth=4 - vadd.w $vr8, $vr6, $vr7 - vslli.w $vr8, $vr8, 1 - vshuf4i.w $vr9, $vr8, 16 - vslli.d $vr9, $vr9, 32 - vsrai.d $vr9, $vr9, 32 - vpickve2gr.d $a1, $vr9, 0 - slli.d $a1, $a1, 3 - vpickve2gr.d $t0, $vr9, 1 - slli.d $t0, $t0, 3 - fldx.d $ft1, $s1, $a1 - fldx.d $ft2, $s1, $t0 - vadd.w $vr11, $vr8, $vr3 - vshuf4i.w $vr11, $vr11, 16 - vslli.d $vr11, $vr11, 32 - vsrai.d $vr11, $vr11, 32 - vpickve2gr.d $a1, $vr11, 0 + vadd.w $vr7, $vr5, $vr6 + vslli.w $vr7, $vr7, 1 + vshuf4i.w $vr8, $vr7, 16 + vslli.d $vr8, $vr8, 32 + vsrai.d $vr8, $vr8, 32 + vpickve2gr.d $a1, $vr8, 0 slli.d $a1, $a1, 3 - vpickve2gr.d $t0, $vr11, 1 + vpickve2gr.d $t0, $vr8, 1 slli.d $t0, $t0, 3 - fldx.d $ft3, $s1, $a1 - fldx.d $ft4, $s1, $t0 - vextrins.d $vr9, $vr10, 16 - vextrins.d $vr11, $vr12, 16 - vadd.w $vr10, $vr8, $vr4 + fldx.d $ft0, $s1, $a1 + fldx.d $ft1, $s1, $t0 + vadd.w $vr10, $vr7, $vr2 vshuf4i.w $vr10, $vr10, 16 vslli.d $vr10, $vr10, 32 vsrai.d $vr10, $vr10, 32 @@ -3957,26 +3943,38 @@ restriction_betas: # @restriction_betas vpickve2gr.d $t0, $vr10, 1 slli.d $t0, $t0, 3 fldx.d $ft2, $s1, $a1 - fldx.d $ft4, $s1, $t0 - vadd.w $vr8, $vr8, $vr5 - vshuf4i.w $vr8, $vr8, 16 - vslli.d $vr8, $vr8, 32 - vsrai.d $vr8, $vr8, 32 - vpickve2gr.d $a1, $vr8, 0 + fldx.d $ft3, $s1, $t0 + vextrins.d $vr8, $vr9, 16 + vextrins.d $vr10, $vr11, 16 + vadd.w $vr9, $vr7, $vr3 + vshuf4i.w $vr9, $vr9, 16 + vslli.d $vr9, $vr9, 32 + vsrai.d $vr9, $vr9, 32 + vpickve2gr.d $a1, $vr9, 0 slli.d $a1, $a1, 3 - vpickve2gr.d $t0, $vr8, 1 + vpickve2gr.d $t0, $vr9, 1 slli.d $t0, $t0, 3 - fldx.d $ft0, $s1, $a1 - fldx.d $ft5, $s1, $t0 - vfadd.d $vr9, $vr9, $vr11 - vextrins.d $vr10, $vr12, 16 - vfadd.d $vr9, $vr9, $vr10 - vextrins.d $vr8, $vr13, 16 - vfadd.d $vr8, $vr9, $vr8 - vfmul.d $vr8, $vr8, $vr2 + fldx.d $ft1, $s1, $a1 + fldx.d $ft3, $s1, $t0 + vadd.w $vr7, $vr7, $vr4 + vshuf4i.w $vr7, $vr7, 16 + vslli.d $vr7, $vr7, 32 + vsrai.d $vr7, $vr7, 32 + vpickve2gr.d $a1, $vr7, 0 + slli.d $a1, $a1, 3 + vpickve2gr.d $t0, $vr7, 1 + slli.d $t0, $t0, 3 + fldx.d $fa7, $s1, $a1 + fldx.d $ft4, $s1, $t0 + vfadd.d $vr8, $vr8, $vr10 + vextrins.d $vr9, $vr11, 16 + vfadd.d $vr8, $vr8, $vr9 + vextrins.d $vr7, $vr12, 16 + vfadd.d $vr7, $vr8, $vr7 + vfmul.d $vr7, $vr7, $vr0 slli.d $a1, $s3, 3 - vstx $vr8, $fp, $a1 - vaddi.wu $vr7, $vr7, 2 + vstx $vr7, $fp, $a1 + vaddi.wu $vr6, $vr6, 2 addi.d $a6, $a6, -2 addi.w $s3, $s3, 2 bnez $a6, .LBB8_25 @@ -4042,8 +4040,8 @@ restriction_betas: # @restriction_betas bstrpick.d $a0, $t1, 30, 1 slli.d $a0, $a0, 1 st.d $a0, $sp, 216 # 8-byte Folded Spill - vinsgr2vr.w $vr3, $t4, 0 - vinsgr2vr.w $vr3, $t4, 1 + vinsgr2vr.w $vr2, $t4, 0 + vinsgr2vr.w $vr2, $t4, 1 b .LBB8_32 .p2align 4, , 16 .LBB8_31: # %._crit_edge212.split.us.us.us @@ -4102,22 +4100,22 @@ restriction_betas: # @restriction_betas # Parent Loop BB8_33 Depth=3 # => This Inner Loop Header: Depth=4 slli.d $a0, $s0, 3 - fldx.d $fa4, $s1, $a0 + fldx.d $fa3, $s1, $a0 addi.w $a0, $s0, 1 slli.d $a0, $a0, 3 - fldx.d $fa5, $s1, $a0 + fldx.d $fa4, $s1, $a0 add.w $a0, $t4, $s0 slli.d $a1, $a0, 3 - fldx.d $fa6, $s1, $a1 + fldx.d $fa5, $s1, $a1 addi.w $a0, $a0, 1 slli.d $a0, $a0, 3 - fldx.d $fa7, $s1, $a0 - fadd.d $fa4, $fa4, $fa5 - fadd.d $fa4, $fa4, $fa6 - fadd.d $fa4, $fa4, $fa7 - fmul.d $fa4, $fa4, $fa0 + fldx.d $fa6, $s1, $a0 + fadd.d $fa3, $fa3, $fa4 + fadd.d $fa3, $fa3, $fa5 + fadd.d $fa3, $fa3, $fa6 + fmul.d $fa3, $fa3, $fa0 slli.d $a0, $s8, 3 - fstx.d $fa4, $a2, $a0 + fstx.d $fa3, $a2, $a0 addi.w $s0, $s0, 2 addi.d $s3, $s3, -1 addi.w $s8, $s8, 1 @@ -4205,10 +4203,10 @@ restriction_betas: # @restriction_betas mul.d $a0, $s4, $t5 ld.d $a1, $sp, 200 # 8-byte Folded Reload add.d $a0, $a0, $a1 - vinsgr2vr.w $vr4, $a0, 0 - vinsgr2vr.w $vr4, $a0, 1 + vinsgr2vr.w $vr3, $a0, 0 + vinsgr2vr.w $vr3, $a0, 1 ld.d $a0, $sp, 208 # 8-byte Folded Reload - vreplgr2vr.d $vr5, $a0 + vreplgr2vr.d $vr4, $a0 move $s3, $a6 ld.d $s0, $sp, 216 # 8-byte Folded Reload .p2align 4, , 16 @@ -4217,31 +4215,9 @@ restriction_betas: # @restriction_betas # Parent Loop BB8_32 Depth=2 # Parent Loop BB8_33 Depth=3 # => This Inner Loop Header: Depth=4 - vadd.w $vr6, $vr4, $vr5 - vslli.w $vr6, $vr6, 1 - vshuf4i.w $vr7, $vr6, 16 - vslli.d $vr7, $vr7, 32 - vsrai.d $vr7, $vr7, 32 - vpickve2gr.d $a0, $vr7, 0 - slli.d $a0, $a0, 3 - vpickve2gr.d $a1, $vr7, 1 - slli.d $a1, $a1, 3 - fldx.d $fa7, $s1, $a0 - fldx.d $ft0, $s1, $a1 - vbitseti.w $vr9, $vr6, 0 - vshuf4i.w $vr10, $vr9, 16 - vslli.d $vr10, $vr10, 32 - vsrai.d $vr10, $vr10, 32 - vpickve2gr.d $a0, $vr10, 0 - slli.d $a0, $a0, 3 - vpickve2gr.d $a1, $vr10, 1 - slli.d $a1, $a1, 3 - fldx.d $ft2, $s1, $a0 - fldx.d $ft3, $s1, $a1 - vextrins.d $vr7, $vr8, 16 - vextrins.d $vr10, $vr11, 16 - vadd.w $vr6, $vr6, $vr3 - vshuf4i.w $vr6, $vr6, 16 + vadd.w $vr5, $vr3, $vr4 + vslli.w $vr5, $vr5, 1 + vshuf4i.w $vr6, $vr5, 16 vslli.d $vr6, $vr6, 32 vsrai.d $vr6, $vr6, 32 vpickve2gr.d $a0, $vr6, 0 @@ -4249,9 +4225,9 @@ restriction_betas: # @restriction_betas vpickve2gr.d $a1, $vr6, 1 slli.d $a1, $a1, 3 fldx.d $fa6, $s1, $a0 - fldx.d $ft0, $s1, $a1 - vadd.w $vr9, $vr9, $vr3 - vshuf4i.w $vr9, $vr9, 16 + fldx.d $fa7, $s1, $a1 + vbitseti.w $vr8, $vr5, 0 + vshuf4i.w $vr9, $vr8, 16 vslli.d $vr9, $vr9, 32 vsrai.d $vr9, $vr9, 32 vpickve2gr.d $a0, $vr9, 0 @@ -4259,16 +4235,38 @@ restriction_betas: # @restriction_betas vpickve2gr.d $a1, $vr9, 1 slli.d $a1, $a1, 3 fldx.d $ft1, $s1, $a0 - fldx.d $ft3, $s1, $a1 - vfadd.d $vr7, $vr7, $vr10 - vextrins.d $vr6, $vr8, 16 - vfadd.d $vr6, $vr7, $vr6 - vextrins.d $vr9, $vr11, 16 + fldx.d $ft2, $s1, $a1 + vextrins.d $vr6, $vr7, 16 + vextrins.d $vr9, $vr10, 16 + vadd.w $vr5, $vr5, $vr2 + vshuf4i.w $vr5, $vr5, 16 + vslli.d $vr5, $vr5, 32 + vsrai.d $vr5, $vr5, 32 + vpickve2gr.d $a0, $vr5, 0 + slli.d $a0, $a0, 3 + vpickve2gr.d $a1, $vr5, 1 + slli.d $a1, $a1, 3 + fldx.d $fa5, $s1, $a0 + fldx.d $fa7, $s1, $a1 + vadd.w $vr8, $vr8, $vr2 + vshuf4i.w $vr8, $vr8, 16 + vslli.d $vr8, $vr8, 32 + vsrai.d $vr8, $vr8, 32 + vpickve2gr.d $a0, $vr8, 0 + slli.d $a0, $a0, 3 + vpickve2gr.d $a1, $vr8, 1 + slli.d $a1, $a1, 3 + fldx.d $ft0, $s1, $a0 + fldx.d $ft2, $s1, $a1 vfadd.d $vr6, $vr6, $vr9 - vfmul.d $vr6, $vr6, $vr2 + vextrins.d $vr5, $vr7, 16 + vfadd.d $vr5, $vr6, $vr5 + vextrins.d $vr8, $vr10, 16 + vfadd.d $vr5, $vr5, $vr8 + vfmul.d $vr5, $vr5, $vr0 slli.d $a0, $s3, 3 - vstx $vr6, $a2, $a0 - vaddi.wu $vr5, $vr5, 2 + vstx $vr5, $a2, $a0 + vaddi.wu $vr4, $vr4, 2 addi.d $s0, $s0, -2 addi.w $s3, $s3, 2 bnez $s0, .LBB8_49 @@ -4334,8 +4332,8 @@ restriction_betas: # @restriction_betas bstrpick.d $a0, $t1, 30, 1 slli.d $a0, $a0, 1 st.d $a0, $sp, 216 # 8-byte Folded Spill - vinsgr2vr.w $vr3, $t5, 0 - vinsgr2vr.w $vr3, $t5, 1 + vinsgr2vr.w $vr2, $t5, 0 + vinsgr2vr.w $vr2, $t5, 1 b .LBB8_56 .p2align 4, , 16 .LBB8_55: # %._crit_edge222.split.us.us.us @@ -4394,22 +4392,22 @@ restriction_betas: # @restriction_betas # Parent Loop BB8_57 Depth=3 # => This Inner Loop Header: Depth=4 slli.d $a0, $s2, 3 - fldx.d $fa4, $s1, $a0 + fldx.d $fa3, $s1, $a0 addi.w $a0, $s2, 1 slli.d $a0, $a0, 3 - fldx.d $fa5, $s1, $a0 + fldx.d $fa4, $s1, $a0 add.w $a0, $t5, $s2 slli.d $a1, $a0, 3 - fldx.d $fa6, $s1, $a1 + fldx.d $fa5, $s1, $a1 addi.w $a0, $a0, 1 slli.d $a0, $a0, 3 - fldx.d $fa7, $s1, $a0 - fadd.d $fa4, $fa4, $fa5 - fadd.d $fa4, $fa4, $fa6 - fadd.d $fa4, $fa4, $fa7 - fmul.d $fa4, $fa4, $fa0 + fldx.d $fa6, $s1, $a0 + fadd.d $fa3, $fa3, $fa4 + fadd.d $fa3, $fa3, $fa5 + fadd.d $fa3, $fa3, $fa6 + fmul.d $fa3, $fa3, $fa0 slli.d $a0, $s7, 3 - fstx.d $fa4, $a2, $a0 + fstx.d $fa3, $a2, $a0 addi.w $s2, $s2, 2 addi.d $s3, $s3, -1 addi.w $s7, $s7, 1 @@ -4497,10 +4495,10 @@ restriction_betas: # @restriction_betas mul.d $a0, $s4, $t5 ld.d $a1, $sp, 200 # 8-byte Folded Reload add.d $a0, $a0, $a1 - vinsgr2vr.w $vr4, $a0, 0 - vinsgr2vr.w $vr4, $a0, 1 + vinsgr2vr.w $vr3, $a0, 0 + vinsgr2vr.w $vr3, $a0, 1 ld.d $a0, $sp, 208 # 8-byte Folded Reload - vreplgr2vr.d $vr5, $a0 + vreplgr2vr.d $vr4, $a0 move $s3, $a6 ld.d $s2, $sp, 216 # 8-byte Folded Reload .p2align 4, , 16 @@ -4509,31 +4507,9 @@ restriction_betas: # @restriction_betas # Parent Loop BB8_56 Depth=2 # Parent Loop BB8_57 Depth=3 # => This Inner Loop Header: Depth=4 - vadd.w $vr6, $vr4, $vr5 - vslli.w $vr6, $vr6, 1 - vshuf4i.w $vr7, $vr6, 16 - vslli.d $vr7, $vr7, 32 - vsrai.d $vr7, $vr7, 32 - vpickve2gr.d $a0, $vr7, 0 - slli.d $a0, $a0, 3 - vpickve2gr.d $a1, $vr7, 1 - slli.d $a1, $a1, 3 - fldx.d $fa7, $s1, $a0 - fldx.d $ft0, $s1, $a1 - vbitseti.w $vr9, $vr6, 0 - vshuf4i.w $vr10, $vr9, 16 - vslli.d $vr10, $vr10, 32 - vsrai.d $vr10, $vr10, 32 - vpickve2gr.d $a0, $vr10, 0 - slli.d $a0, $a0, 3 - vpickve2gr.d $a1, $vr10, 1 - slli.d $a1, $a1, 3 - fldx.d $ft2, $s1, $a0 - fldx.d $ft3, $s1, $a1 - vextrins.d $vr7, $vr8, 16 - vextrins.d $vr10, $vr11, 16 - vadd.w $vr6, $vr6, $vr3 - vshuf4i.w $vr6, $vr6, 16 + vadd.w $vr5, $vr3, $vr4 + vslli.w $vr5, $vr5, 1 + vshuf4i.w $vr6, $vr5, 16 vslli.d $vr6, $vr6, 32 vsrai.d $vr6, $vr6, 32 vpickve2gr.d $a0, $vr6, 0 @@ -4541,9 +4517,9 @@ restriction_betas: # @restriction_betas vpickve2gr.d $a1, $vr6, 1 slli.d $a1, $a1, 3 fldx.d $fa6, $s1, $a0 - fldx.d $ft0, $s1, $a1 - vadd.w $vr9, $vr9, $vr3 - vshuf4i.w $vr9, $vr9, 16 + fldx.d $fa7, $s1, $a1 + vbitseti.w $vr8, $vr5, 0 + vshuf4i.w $vr9, $vr8, 16 vslli.d $vr9, $vr9, 32 vsrai.d $vr9, $vr9, 32 vpickve2gr.d $a0, $vr9, 0 @@ -4551,16 +4527,38 @@ restriction_betas: # @restriction_betas vpickve2gr.d $a1, $vr9, 1 slli.d $a1, $a1, 3 fldx.d $ft1, $s1, $a0 - fldx.d $ft3, $s1, $a1 - vfadd.d $vr7, $vr7, $vr10 - vextrins.d $vr6, $vr8, 16 - vfadd.d $vr6, $vr7, $vr6 - vextrins.d $vr9, $vr11, 16 + fldx.d $ft2, $s1, $a1 + vextrins.d $vr6, $vr7, 16 + vextrins.d $vr9, $vr10, 16 + vadd.w $vr5, $vr5, $vr2 + vshuf4i.w $vr5, $vr5, 16 + vslli.d $vr5, $vr5, 32 + vsrai.d $vr5, $vr5, 32 + vpickve2gr.d $a0, $vr5, 0 + slli.d $a0, $a0, 3 + vpickve2gr.d $a1, $vr5, 1 + slli.d $a1, $a1, 3 + fldx.d $fa5, $s1, $a0 + fldx.d $fa7, $s1, $a1 + vadd.w $vr8, $vr8, $vr2 + vshuf4i.w $vr8, $vr8, 16 + vslli.d $vr8, $vr8, 32 + vsrai.d $vr8, $vr8, 32 + vpickve2gr.d $a0, $vr8, 0 + slli.d $a0, $a0, 3 + vpickve2gr.d $a1, $vr8, 1 + slli.d $a1, $a1, 3 + fldx.d $ft0, $s1, $a0 + fldx.d $ft2, $s1, $a1 vfadd.d $vr6, $vr6, $vr9 - vfmul.d $vr6, $vr6, $vr2 + vextrins.d $vr5, $vr7, 16 + vfadd.d $vr5, $vr6, $vr5 + vextrins.d $vr8, $vr10, 16 + vfadd.d $vr5, $vr5, $vr8 + vfmul.d $vr5, $vr5, $vr0 slli.d $a0, $s3, 3 - vstx $vr6, $a2, $a0 - vaddi.wu $vr5, $vr5, 2 + vstx $vr5, $a2, $a0 + vaddi.wu $vr4, $vr4, 2 addi.d $s2, $s2, -2 addi.w $s3, $s3, 2 bnez $s2, .LBB8_73 @@ -7055,8 +7053,7 @@ project_cell_to_face: # @project_cell_to_face st.d $a1, $sp, 32 # 8-byte Folded Spill ori $a7, $zero, 7 vldi $vr0, -928 - lu52i.d $a4, $zero, 1022 - xvreplgr2vr.d $xr1, $a4 + xvldi $xr1, -928 # implicit-def: $r13 b .LBB20_3 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/FreeBench/neural/CMakeFiles/neural.dir/neural.s b/results/MultiSource/Benchmarks/FreeBench/neural/CMakeFiles/neural.dir/neural.s index 55065458..c808da61 100644 --- a/results/MultiSource/Benchmarks/FreeBench/neural/CMakeFiles/neural.dir/neural.s +++ b/results/MultiSource/Benchmarks/FreeBench/neural/CMakeFiles/neural.dir/neural.s @@ -37,19 +37,19 @@ .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -352 - st.d $ra, $sp, 344 # 8-byte Folded Spill - st.d $fp, $sp, 336 # 8-byte Folded Spill - st.d $s0, $sp, 328 # 8-byte Folded Spill - st.d $s1, $sp, 320 # 8-byte Folded Spill - st.d $s2, $sp, 312 # 8-byte Folded Spill - st.d $s3, $sp, 304 # 8-byte Folded Spill - st.d $s4, $sp, 296 # 8-byte Folded Spill - st.d $s5, $sp, 288 # 8-byte Folded Spill - st.d $s6, $sp, 280 # 8-byte Folded Spill - st.d $s7, $sp, 272 # 8-byte Folded Spill - st.d $s8, $sp, 264 # 8-byte Folded Spill - fst.d $fs0, $sp, 256 # 8-byte Folded Spill + addi.d $sp, $sp, -336 + st.d $ra, $sp, 328 # 8-byte Folded Spill + st.d $fp, $sp, 320 # 8-byte Folded Spill + st.d $s0, $sp, 312 # 8-byte Folded Spill + st.d $s1, $sp, 304 # 8-byte Folded Spill + st.d $s2, $sp, 296 # 8-byte Folded Spill + st.d $s3, $sp, 288 # 8-byte Folded Spill + st.d $s4, $sp, 280 # 8-byte Folded Spill + st.d $s5, $sp, 272 # 8-byte Folded Spill + st.d $s6, $sp, 264 # 8-byte Folded Spill + st.d $s7, $sp, 256 # 8-byte Folded Spill + st.d $s8, $sp, 248 # 8-byte Folded Spill + fst.d $fs0, $sp, 240 # 8-byte Folded Spill move $s0, $a1 move $fp, $a0 pcalau12i $a0, %got_pc_hi20(stderr) @@ -79,61 +79,61 @@ main: # @main beqz $a0, .LBB0_131 # %bb.2: move $s5, $a0 - addi.d $a0, $sp, 156 + addi.d $a0, $sp, 140 ori $a1, $zero, 99 move $a2, $s5 pcaddu18i $ra, %call36(fgets) jirl $ra, $ra, 0 - addi.d $a0, $sp, 156 + addi.d $a0, $sp, 140 ori $a2, $zero, 10 move $a1, $zero pcaddu18i $ra, %call36(strtol) jirl $ra, $ra, 0 pcalau12i $fp, %pc_hi20(NNWIDTH) st.w $a0, $fp, %pc_lo12(NNWIDTH) - addi.d $a0, $sp, 156 + addi.d $a0, $sp, 140 ori $a1, $zero, 99 move $a2, $s5 pcaddu18i $ra, %call36(fgets) jirl $ra, $ra, 0 - addi.d $a0, $sp, 156 + addi.d $a0, $sp, 140 ori $a2, $zero, 10 move $a1, $zero pcaddu18i $ra, %call36(strtol) jirl $ra, $ra, 0 pcalau12i $s0, %pc_hi20(NNHEIGHT) st.w $a0, $s0, %pc_lo12(NNHEIGHT) - addi.d $a0, $sp, 156 + addi.d $a0, $sp, 140 ori $a1, $zero, 99 move $a2, $s5 pcaddu18i $ra, %call36(fgets) jirl $ra, $ra, 0 - addi.d $a0, $sp, 156 + addi.d $a0, $sp, 140 ori $a2, $zero, 10 move $a1, $zero pcaddu18i $ra, %call36(strtol) jirl $ra, $ra, 0 ld.w $a1, $fp, %pc_lo12(NNWIDTH) - st.d $s0, $sp, 128 # 8-byte Folded Spill + st.d $s0, $sp, 112 # 8-byte Folded Spill ld.w $a2, $s0, %pc_lo12(NNHEIGHT) pcalau12i $s0, %pc_hi20(NUMPATS) st.w $a0, $s0, %pc_lo12(NUMPATS) mul.d $a0, $a2, $a1 pcalau12i $a3, %pc_hi20(NNTOT) - st.d $a3, $sp, 112 # 8-byte Folded Spill + st.d $a3, $sp, 96 # 8-byte Folded Spill st.w $a0, $a3, %pc_lo12(NNTOT) pcalau12i $a0, %pc_hi20(.L.str.8) addi.d $a0, $a0, %pc_lo12(.L.str.8) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - st.d $s0, $sp, 120 # 8-byte Folded Spill + st.d $s0, $sp, 104 # 8-byte Folded Spill ld.w $s6, $s0, %pc_lo12(NUMPATS) move $a0, $s6 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 move $s1, $a0 pcalau12i $a0, %pc_hi20(vnames) - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill st.d $s1, $a0, %pc_lo12(vnames) slli.d $a0, $s6, 2 pcaddu18i $ra, %call36(malloc) @@ -144,7 +144,7 @@ main: # @main # %bb.3: beqz $a0, .LBB0_128 # %bb.4: - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.w $s0, $a0, %pc_lo12(NNTOT) slli.d $a0, $s0, 3 pcaddu18i $ra, %call36(malloc) @@ -169,8 +169,8 @@ main: # @main addi.d $s2, $s2, 8 bnez $s0, .LBB0_7 .LBB0_9: # %._crit_edge - st.d $s5, $sp, 32 # 8-byte Folded Spill - st.d $s4, $sp, 24 # 8-byte Folded Spill + st.d $s5, $sp, 24 # 8-byte Folded Spill + st.d $s4, $sp, 8 # 8-byte Folded Spill st.d $s3, $sp, 16 # 8-byte Folded Spill slli.d $s4, $s6, 3 move $a0, $s4 @@ -184,7 +184,7 @@ main: # @main jirl $ra, $ra, 0 move $s3, $a0 pcalau12i $a0, %pc_hi20(newvectors) - st.d $a0, $sp, 144 # 8-byte Folded Spill + st.d $a0, $sp, 128 # 8-byte Folded Spill st.d $s3, $a0, %pc_lo12(newvectors) move $a0, $s4 pcaddu18i $ra, %call36(malloc) @@ -198,10 +198,10 @@ main: # @main # %bb.11: # %._crit_edge beqz $s4, .LBB0_127 # %bb.12: # %.preheader - st.d $s6, $sp, 64 # 8-byte Folded Spill + st.d $s6, $sp, 32 # 8-byte Folded Spill blez $s6, .LBB0_18 # %bb.13: # %.lr.ph67 - ld.d $s0, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 32 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_14: # =>This Inner Loop Header: Depth=1 move $a0, $s1 @@ -237,8 +237,8 @@ main: # @main beqz $a0, .LBB0_127 # %bb.19: # %.preheader.i move $s1, $a0 - ld.d $s6, $sp, 32 # 8-byte Folded Reload - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $s6, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload blez $a0, .LBB0_28 # %bb.20: # %.lr.ph30.i.preheader pcalau12i $a0, %pc_hi20(.L.str.18) @@ -248,7 +248,7 @@ main: # @main .p2align 4, , 16 .LBB0_21: # %._crit_edge28.i # in Loop: Header=BB0_22 Depth=1 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(NUMPATS) addi.d $s2, $s2, 1 bge $s2, $a0, .LBB0_28 @@ -262,10 +262,10 @@ main: # @main pcaddu18i $ra, %call36(__isoc99_fscanf) jirl $ra, $ra, 0 ld.b $a0, $s1, 0 - ld.d $a1, $sp, 96 # 8-byte Folded Reload + ld.d $a1, $sp, 64 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(vnames) stx.b $a0, $a1, $s2 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(NNHEIGHT) blez $a0, .LBB0_21 # %bb.23: # %.lr.ph27.i.preheader @@ -277,7 +277,7 @@ main: # @main .p2align 4, , 16 .LBB0_24: # %._crit_edge.i # in Loop: Header=BB0_25 Depth=2 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(NNHEIGHT) addi.w $s4, $s4, 1 bge $s4, $a0, .LBB0_21 @@ -326,11 +326,11 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.Lstr) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(NUMPATS) blez $a0, .LBB0_64 # %bb.29: # %.lr.ph39.i - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload ld.w $a2, $a1, %pc_lo12(NNTOT) blez $a2, .LBB0_49 # %bb.30: # %.lr.ph39.split.us.preheader.i @@ -631,7 +631,7 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.Lstr.1) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.w $s0, $a0, %pc_lo12(NNTOT) blez $s0, .LBB0_75 # %bb.65: # %.preheader31.lr.ph.i @@ -714,7 +714,7 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.Lstr.2) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(NNTOT) slli.d $a0, $a0, 2 pcaddu18i $ra, %call36(malloc) @@ -722,7 +722,7 @@ main: # @main beqz $a0, .LBB0_127 # %bb.76: # %.preheader36.i move $fp, $a0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(NUMPATS) blez $a0, .LBB0_106 # %bb.77: # %.preheader36.split.i.preheader @@ -732,13 +732,10 @@ main: # @main ori $s3, $zero, 3 movgr2fr.w $fs0, $zero ori $s4, $zero, 16 - lu12i.w $a1, 258048 - vreplgr2vr.w $vr7, $a1 vrepli.b $vr8, 0 - xvreplgr2vr.w $xr9, $a1 + xvldi $xr9, -3265 xvrepli.b $xr10, 0 - vst $vr7, $sp, 96 # 16-byte Folded Spill - vst $vr8, $sp, 128 # 16-byte Folded Spill + vst $vr8, $sp, 112 # 16-byte Folded Spill xvst $xr9, $sp, 64 # 32-byte Folded Spill xvst $xr10, $sp, 32 # 32-byte Folded Spill b .LBB0_79 @@ -765,7 +762,7 @@ main: # @main move $s6, $a4 .LBB0_82: # %._crit_edge43.split.us.i # in Loop: Header=BB0_83 Depth=2 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(NUMPATS) addi.d $s5, $s5, 1 bge $s5, $a0, .LBB0_78 @@ -778,7 +775,7 @@ main: # @main # Child Loop BB0_102 Depth 3 # Child Loop BB0_104 Depth 4 ld.d $a0, $s8, %pc_lo12(vectors) - ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(newvectors) ld.w $a2, $s0, %pc_lo12(nmode) slli.d $s1, $s5, 3 @@ -794,19 +791,19 @@ main: # @main pcaddu18i $ra, %call36(runcont) jirl $ra, $ra, 0 .LBB0_86: # in Loop: Header=BB0_83 Depth=2 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(NNTOT) vldi $vr6, -1184 - vld $vr8, $sp, 128 # 16-byte Folded Reload + vld $vr8, $sp, 112 # 16-byte Folded Reload blez $a0, .LBB0_82 # %bb.87: # %iter.check164 # in Loop: Header=BB0_83 Depth=2 ld.d $a1, $s8, %pc_lo12(vectors) - ld.d $a2, $sp, 144 # 8-byte Folded Reload + ld.d $a2, $sp, 128 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(newvectors) ldx.d $a1, $a1, $s1 ldx.d $a2, $a2, $s1 - vld $vr7, $sp, 96 # 16-byte Folded Reload + vldi $vr7, -3265 xvld $xr9, $sp, 64 # 32-byte Folded Reload xvld $xr10, $sp, 32 # 32-byte Folded Reload bltu $s3, $a0, .LBB0_89 @@ -1021,7 +1018,7 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.Lstr.3) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(NUMPATS) ld.d $s3, $sp, 16 # 8-byte Folded Reload blez $a0, .LBB0_125 @@ -1029,7 +1026,7 @@ main: # @main move $fp, $zero move $s1, $zero vrepli.b $vr0, 0 - vst $vr0, $sp, 128 # 16-byte Folded Spill + vst $vr0, $sp, 112 # 16-byte Folded Spill ori $s2, $zero, 2 b .LBB0_110 .p2align 4, , 16 @@ -1037,7 +1034,7 @@ main: # @main pcaddu18i $ra, %call36(runcont) jirl $ra, $ra, 0 .LBB0_109: # in Loop: Header=BB0_110 Depth=1 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(NUMPATS) addi.d $s1, $s1, 1 addi.d $fp, $fp, 8 @@ -1045,7 +1042,7 @@ main: # @main .LBB0_110: # %.lr.ph.i55 # =>This Inner Loop Header: Depth=1 ld.d $a0, $s8, %pc_lo12(vectors) - ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(newvectors) ld.w $a2, $s0, %pc_lo12(nmode) ldx.d $a0, $a0, $fp @@ -1074,7 +1071,7 @@ main: # @main move $a1, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(NUMPATS) addi.d $s0, $s0, 1 bge $s0, $a0, .LBB0_125 @@ -1082,14 +1079,14 @@ main: # @main # =>This Loop Header: Depth=1 # Child Loop BB0_119 Depth 2 # Child Loop BB0_122 Depth 2 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.w $a1, $a0, %pc_lo12(NNTOT) slli.d $a0, $s0, 2 blez $a1, .LBB0_114 # %bb.116: # %.lr.ph.preheader.i.i # in Loop: Header=BB0_115 Depth=1 ld.d $a2, $s8, %pc_lo12(vectors) - ld.d $a3, $sp, 144 # 8-byte Folded Reload + ld.d $a3, $sp, 128 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(newvectors) slli.d $a4, $s0, 3 ldx.d $a2, $a2, $a4 @@ -1107,7 +1104,7 @@ main: # @main addi.d $a5, $a3, 16 addi.d $a6, $a2, 16 move $a7, $a4 - vld $vr1, $sp, 128 # 16-byte Folded Reload + vld $vr1, $sp, 112 # 16-byte Folded Reload vori.b $vr0, $vr1, 0 .p2align 4, , 16 .LBB0_119: # %vector.body208 @@ -1163,32 +1160,32 @@ main: # @main move $a1, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(NUMPATS) addi.d $s0, $s0, 1 blt $s0, $a0, .LBB0_115 .LBB0_125: # %storecheck.exit move $a0, $zero - fld.d $fs0, $sp, 256 # 8-byte Folded Reload - ld.d $s8, $sp, 264 # 8-byte Folded Reload - ld.d $s7, $sp, 272 # 8-byte Folded Reload - ld.d $s6, $sp, 280 # 8-byte Folded Reload - ld.d $s5, $sp, 288 # 8-byte Folded Reload - ld.d $s4, $sp, 296 # 8-byte Folded Reload - ld.d $s3, $sp, 304 # 8-byte Folded Reload - ld.d $s2, $sp, 312 # 8-byte Folded Reload - ld.d $s1, $sp, 320 # 8-byte Folded Reload - ld.d $s0, $sp, 328 # 8-byte Folded Reload - ld.d $fp, $sp, 336 # 8-byte Folded Reload - ld.d $ra, $sp, 344 # 8-byte Folded Reload - addi.d $sp, $sp, 352 + fld.d $fs0, $sp, 240 # 8-byte Folded Reload + ld.d $s8, $sp, 248 # 8-byte Folded Reload + ld.d $s7, $sp, 256 # 8-byte Folded Reload + ld.d $s6, $sp, 264 # 8-byte Folded Reload + ld.d $s5, $sp, 272 # 8-byte Folded Reload + ld.d $s4, $sp, 280 # 8-byte Folded Reload + ld.d $s3, $sp, 288 # 8-byte Folded Reload + ld.d $s2, $sp, 296 # 8-byte Folded Reload + ld.d $s1, $sp, 304 # 8-byte Folded Reload + ld.d $s0, $sp, 312 # 8-byte Folded Reload + ld.d $fp, $sp, 320 # 8-byte Folded Reload + ld.d $ra, $sp, 328 # 8-byte Folded Reload + addi.d $sp, $sp, 336 ret .LBB0_126: slli.d $a3, $a0, 1 andi $a3, $a3, 30 b .LBB0_60 .LBB0_127: - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 8 # 8-byte Folded Reload ld.d $a3, $a0, 0 b .LBB0_129 .LBB0_128: diff --git a/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/fftsg.s b/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/fftsg.s index cefd0dd8..7b0512a2 100644 --- a/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/fftsg.s +++ b/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/fftsg.s @@ -120,8 +120,7 @@ makewt: # @makewt # kill: def $f0_64 killed $f0_64 def $vr0 vld $vr1, $sp, 16 # 16-byte Folded Reload vextrins.d $vr1, $vr0, 16 - lu52i.d $a0, $zero, 1022 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -928 vfdiv.d $vr0, $vr0, $vr1 ori $a0, $zero, 10 vst $vr0, $fp, 16 @@ -169,8 +168,7 @@ makewt: # @makewt addi.d $a0, $fp, 32 addi.d $a1, $fp, 64 ori $a2, $zero, 8 - lu52i.d $a3, $zero, 1022 - vreplgr2vr.d $vr0, $a3 + vldi $vr0, -928 ori $a3, $zero, 10 ori $a4, $zero, 6 b .LBB1_8 diff --git a/results/MultiSource/Benchmarks/McCat/08-main/CMakeFiles/main.dir/object.s b/results/MultiSource/Benchmarks/McCat/08-main/CMakeFiles/main.dir/object.s index 5f1c1842..30603095 100644 --- a/results/MultiSource/Benchmarks/McCat/08-main/CMakeFiles/main.dir/object.s +++ b/results/MultiSource/Benchmarks/McCat/08-main/CMakeFiles/main.dir/object.s @@ -18,9 +18,9 @@ Oalloc: # @Oalloc move $a1, $fp pcaddu18i $ra, %call36(strcpy) jirl $ra, $ra, 0 - lu52i.d $a0, $zero, 1023 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -912 vst $vr0, $s0, 104 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 120 xvrepli.b $xr1, 0 xvst $xr1, $s0, 128 diff --git a/results/MultiSource/Benchmarks/McCat/09-vor/CMakeFiles/vor.dir/splay2.s b/results/MultiSource/Benchmarks/McCat/09-vor/CMakeFiles/vor.dir/splay2.s index a7deb6e2..0a8b4596 100644 --- a/results/MultiSource/Benchmarks/McCat/09-vor/CMakeFiles/vor.dir/splay2.s +++ b/results/MultiSource/Benchmarks/McCat/09-vor/CMakeFiles/vor.dir/splay2.s @@ -696,8 +696,7 @@ CHdelete: # @CHdelete pcaddu18i $ra, %call36(CHsplay) jirl $ra, $ra, 0 st.d $zero, $s0, 32 - lu52i.d $a0, $zero, -1025 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -784 vst $vr0, $sp, 16 addi.w $a0, $zero, -1 lu32i.d $a0, 0 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jcdctmgr.s b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jcdctmgr.s index a5eedc48..297b11fd 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jcdctmgr.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jcdctmgr.s @@ -196,8 +196,7 @@ start_pass_fdctmgr: # @start_pass_fdctmgr fld.d $fs1, $a0, %pc_lo12(.LCPI1_1) pcalau12i $a0, %pc_hi20(.LCPI1_2) fld.d $fs2, $a0, %pc_lo12(.LCPI1_2) - lu52i.d $a0, $zero, 1026 - xvreplgr2vr.d $xr6, $a0 + xvldi $xr6, -992 pcalau12i $a0, %pc_hi20(.LCPI1_3) fld.d $fs3, $a0, %pc_lo12(.LCPI1_3) pcalau12i $a0, %pc_hi20(.LCPI1_4) @@ -205,8 +204,7 @@ start_pass_fdctmgr: # @start_pass_fdctmgr pcalau12i $a0, %pc_hi20(.LCPI1_5) fld.d $fs5, $a0, %pc_lo12(.LCPI1_5) ori $s8, $zero, 64 - ori $a0, $zero, 1024 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3836 xvst $xr0, $sp, 32 # 32-byte Folded Spill xvst $xr6, $sp, 96 # 32-byte Folded Spill b .LBB1_4 @@ -1440,14 +1438,14 @@ forward_DCT_float: # @forward_DCT_float # %bb.0: beqz $a6, .LBB3_4 # %bb.1: # %.preheader.preheader - addi.d $sp, $sp, -384 - st.d $ra, $sp, 376 # 8-byte Folded Spill - st.d $fp, $sp, 368 # 8-byte Folded Spill - st.d $s0, $sp, 360 # 8-byte Folded Spill - st.d $s1, $sp, 352 # 8-byte Folded Spill - st.d $s2, $sp, 344 # 8-byte Folded Spill - st.d $s3, $sp, 336 # 8-byte Folded Spill - st.d $s4, $sp, 328 # 8-byte Folded Spill + addi.d $sp, $sp, -368 + st.d $ra, $sp, 360 # 8-byte Folded Spill + st.d $fp, $sp, 352 # 8-byte Folded Spill + st.d $s0, $sp, 344 # 8-byte Folded Spill + st.d $s1, $sp, 336 # 8-byte Folded Spill + st.d $s2, $sp, 328 # 8-byte Folded Spill + st.d $s3, $sp, 320 # 8-byte Folded Spill + st.d $s4, $sp, 312 # 8-byte Folded Spill move $fp, $a5 ld.d $a0, $a0, 480 ld.w $a1, $a1, 16 @@ -1461,10 +1459,7 @@ forward_DCT_float: # @forward_DCT_float lu12i.w $a0, 288768 ori $a0, $a0, 256 xvreplgr2vr.w $xr0, $a0 - xvst $xr0, $sp, 32 # 32-byte Folded Spill - lu12i.w $a0, 12 - vreplgr2vr.h $vr0, $a0 - vst $vr0, $sp, 16 # 16-byte Folded Spill + xvst $xr0, $sp, 16 # 32-byte Folded Spill .p2align 4, , 16 .LBB3_2: # %.preheader # =>This Inner Loop Header: Depth=1 @@ -1475,342 +1470,342 @@ forward_DCT_float: # @forward_DCT_float addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 72 + fst.s $fa0, $sp, 56 ld.bu $a2, $a1, 1 addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 76 + fst.s $fa0, $sp, 60 ld.bu $a2, $a1, 2 addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 80 + fst.s $fa0, $sp, 64 ld.bu $a2, $a1, 3 addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 84 + fst.s $fa0, $sp, 68 ld.bu $a2, $a1, 4 addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 88 + fst.s $fa0, $sp, 72 ld.bu $a2, $a1, 5 addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 92 + fst.s $fa0, $sp, 76 ld.bu $a2, $a1, 6 addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 96 + fst.s $fa0, $sp, 80 ld.bu $a1, $a1, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 8 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 100 + fst.s $fa0, $sp, 84 ldx.bu $a1, $a2, $a0 add.d $a2, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 104 + fst.s $fa0, $sp, 88 ld.bu $a1, $a2, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 108 + fst.s $fa0, $sp, 92 ld.bu $a1, $a2, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 112 + fst.s $fa0, $sp, 96 ld.bu $a1, $a2, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 116 + fst.s $fa0, $sp, 100 ld.bu $a1, $a2, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 120 + fst.s $fa0, $sp, 104 ld.bu $a1, $a2, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 124 + fst.s $fa0, $sp, 108 ld.bu $a1, $a2, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 128 + fst.s $fa0, $sp, 112 ld.bu $a1, $a2, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 16 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 132 + fst.s $fa0, $sp, 116 ldx.bu $a1, $a2, $a0 add.d $a2, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 136 + fst.s $fa0, $sp, 120 ld.bu $a1, $a2, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 140 + fst.s $fa0, $sp, 124 ld.bu $a1, $a2, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 144 + fst.s $fa0, $sp, 128 ld.bu $a1, $a2, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 148 + fst.s $fa0, $sp, 132 ld.bu $a1, $a2, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 152 + fst.s $fa0, $sp, 136 ld.bu $a1, $a2, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 156 + fst.s $fa0, $sp, 140 ld.bu $a1, $a2, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 160 + fst.s $fa0, $sp, 144 ld.bu $a1, $a2, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 24 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 164 + fst.s $fa0, $sp, 148 ldx.bu $a1, $a2, $a0 add.d $a2, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 168 + fst.s $fa0, $sp, 152 ld.bu $a1, $a2, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 172 + fst.s $fa0, $sp, 156 ld.bu $a1, $a2, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 176 + fst.s $fa0, $sp, 160 ld.bu $a1, $a2, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 180 + fst.s $fa0, $sp, 164 ld.bu $a1, $a2, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 184 + fst.s $fa0, $sp, 168 ld.bu $a1, $a2, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 188 + fst.s $fa0, $sp, 172 ld.bu $a1, $a2, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 192 + fst.s $fa0, $sp, 176 ld.bu $a1, $a2, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 32 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 196 + fst.s $fa0, $sp, 180 ldx.bu $a1, $a2, $a0 add.d $a2, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 200 + fst.s $fa0, $sp, 184 ld.bu $a1, $a2, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 204 + fst.s $fa0, $sp, 188 ld.bu $a1, $a2, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 208 + fst.s $fa0, $sp, 192 ld.bu $a1, $a2, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 212 + fst.s $fa0, $sp, 196 ld.bu $a1, $a2, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 216 + fst.s $fa0, $sp, 200 ld.bu $a1, $a2, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 220 + fst.s $fa0, $sp, 204 ld.bu $a1, $a2, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 224 + fst.s $fa0, $sp, 208 ld.bu $a1, $a2, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 40 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 228 + fst.s $fa0, $sp, 212 ldx.bu $a1, $a2, $a0 add.d $a2, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 232 + fst.s $fa0, $sp, 216 ld.bu $a1, $a2, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 236 + fst.s $fa0, $sp, 220 ld.bu $a1, $a2, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 240 + fst.s $fa0, $sp, 224 ld.bu $a1, $a2, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 244 + fst.s $fa0, $sp, 228 ld.bu $a1, $a2, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 248 + fst.s $fa0, $sp, 232 ld.bu $a1, $a2, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 252 + fst.s $fa0, $sp, 236 ld.bu $a1, $a2, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 256 + fst.s $fa0, $sp, 240 ld.bu $a1, $a2, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 48 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 260 + fst.s $fa0, $sp, 244 ldx.bu $a1, $a2, $a0 add.d $a2, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 264 + fst.s $fa0, $sp, 248 ld.bu $a1, $a2, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 268 + fst.s $fa0, $sp, 252 ld.bu $a1, $a2, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 272 + fst.s $fa0, $sp, 256 ld.bu $a1, $a2, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 276 + fst.s $fa0, $sp, 260 ld.bu $a1, $a2, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 280 + fst.s $fa0, $sp, 264 ld.bu $a1, $a2, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 284 + fst.s $fa0, $sp, 268 ld.bu $a1, $a2, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 288 + fst.s $fa0, $sp, 272 ld.bu $a1, $a2, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 56 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 292 + fst.s $fa0, $sp, 276 ldx.bu $a1, $a2, $a0 add.d $a0, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 296 + fst.s $fa0, $sp, 280 ld.bu $a1, $a0, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 300 + fst.s $fa0, $sp, 284 ld.bu $a1, $a0, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 304 + fst.s $fa0, $sp, 288 ld.bu $a1, $a0, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 308 + fst.s $fa0, $sp, 292 ld.bu $a1, $a0, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 312 + fst.s $fa0, $sp, 296 ld.bu $a1, $a0, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 316 + fst.s $fa0, $sp, 300 ld.bu $a1, $a0, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 320 + fst.s $fa0, $sp, 304 ld.bu $a0, $a0, 7 addi.d $a0, $a0, -128 movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 324 - addi.d $a0, $sp, 72 + fst.s $fa0, $sp, 308 + addi.d $a0, $sp, 56 jirl $ra, $s0, 0 - xvld $xr0, $sp, 72 + xvld $xr0, $sp, 56 xvld $xr1, $s1, 0 xvfmul.s $xr0, $xr0, $xr1 - xvld $xr3, $sp, 32 # 32-byte Folded Reload + xvld $xr3, $sp, 16 # 32-byte Folded Reload xvfadd.s $xr0, $xr0, $xr3 xvftintrz.w.s $xr0, $xr0 xvpickve2gr.w $a0, $xr0, 0 @@ -1828,10 +1823,10 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 104 + xvld $xr0, $sp, 88 xvld $xr2, $s1, 32 vinsgr2vr.h $vr1, $a0, 7 - vld $vr4, $sp, 16 # 16-byte Folded Reload + vldi $vr4, -2624 vadd.h $vr1, $vr1, $vr4 vst $vr1, $s4, -64 xvfmul.s $xr0, $xr0, $xr2 @@ -1852,7 +1847,7 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 136 + xvld $xr0, $sp, 120 xvld $xr2, $s1, 64 vinsgr2vr.h $vr1, $a0, 7 vadd.h $vr1, $vr1, $vr4 @@ -1875,7 +1870,7 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 168 + xvld $xr0, $sp, 152 xvld $xr2, $s1, 96 vinsgr2vr.h $vr1, $a0, 7 vadd.h $vr1, $vr1, $vr4 @@ -1898,7 +1893,7 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 200 + xvld $xr0, $sp, 184 xvld $xr2, $s1, 128 vinsgr2vr.h $vr1, $a0, 7 vadd.h $vr1, $vr1, $vr4 @@ -1921,7 +1916,7 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 232 + xvld $xr0, $sp, 216 xvld $xr2, $s1, 160 vinsgr2vr.h $vr1, $a0, 7 vadd.h $vr1, $vr1, $vr4 @@ -1944,7 +1939,7 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 264 + xvld $xr0, $sp, 248 xvld $xr2, $s1, 192 vinsgr2vr.h $vr1, $a0, 7 vadd.h $vr1, $vr1, $vr4 @@ -1967,7 +1962,7 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 296 + xvld $xr0, $sp, 280 xvld $xr2, $s1, 224 vinsgr2vr.h $vr1, $a0, 7 vadd.h $vr1, $vr1, $vr4 @@ -1998,14 +1993,14 @@ forward_DCT_float: # @forward_DCT_float addi.d $s4, $s4, 128 bnez $s3, .LBB3_2 # %bb.3: - ld.d $s4, $sp, 328 # 8-byte Folded Reload - ld.d $s3, $sp, 336 # 8-byte Folded Reload - ld.d $s2, $sp, 344 # 8-byte Folded Reload - ld.d $s1, $sp, 352 # 8-byte Folded Reload - ld.d $s0, $sp, 360 # 8-byte Folded Reload - ld.d $fp, $sp, 368 # 8-byte Folded Reload - ld.d $ra, $sp, 376 # 8-byte Folded Reload - addi.d $sp, $sp, 384 + ld.d $s4, $sp, 312 # 8-byte Folded Reload + ld.d $s3, $sp, 320 # 8-byte Folded Reload + ld.d $s2, $sp, 328 # 8-byte Folded Reload + ld.d $s1, $sp, 336 # 8-byte Folded Reload + ld.d $s0, $sp, 344 # 8-byte Folded Reload + ld.d $fp, $sp, 352 # 8-byte Folded Reload + ld.d $ra, $sp, 360 # 8-byte Folded Reload + addi.d $sp, $sp, 368 .LBB3_4: # %._crit_edge ret .Lfunc_end3: diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jddctmgr.s b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jddctmgr.s index ca5bfa1e..293a54a9 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jddctmgr.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jddctmgr.s @@ -177,8 +177,6 @@ start_pass: # @start_pass pcalau12i $a0, %got_pc_hi20(jpeg_idct_1x1) ld.d $s3, $a0, %got_pc_lo12(jpeg_idct_1x1) ori $s4, $zero, 7 - vrepli.b $vr0, 0 - vst $vr0, $sp, 272 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_0) fld.d $fs0, $a0, %pc_lo12(.LCPI1_0) pcalau12i $a0, %pc_hi20(.LCPI1_1) @@ -193,27 +191,28 @@ start_pass: # @start_pass fld.d $fs5, $a0, %pc_lo12(.LCPI1_5) pcalau12i $a0, %pc_hi20(.LCPI1_6) xvld $xr0, $a0, %pc_lo12(.LCPI1_6) - xvst $xr0, $sp, 240 # 32-byte Folded Spill + xvst $xr0, $sp, 256 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_7) xvld $xr0, $a0, %pc_lo12(.LCPI1_7) - xvst $xr0, $sp, 208 # 32-byte Folded Spill + xvst $xr0, $sp, 224 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_8) xvld $xr0, $a0, %pc_lo12(.LCPI1_8) - xvst $xr0, $sp, 176 # 32-byte Folded Spill + xvst $xr0, $sp, 192 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_9) xvld $xr0, $a0, %pc_lo12(.LCPI1_9) - xvst $xr0, $sp, 144 # 32-byte Folded Spill + xvst $xr0, $sp, 160 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_10) xvld $xr0, $a0, %pc_lo12(.LCPI1_10) - xvst $xr0, $sp, 112 # 32-byte Folded Spill + xvst $xr0, $sp, 128 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_11) xvld $xr0, $a0, %pc_lo12(.LCPI1_11) - xvst $xr0, $sp, 80 # 32-byte Folded Spill + xvst $xr0, $sp, 96 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_12) xvld $xr0, $a0, %pc_lo12(.LCPI1_12) - xvst $xr0, $sp, 48 # 32-byte Folded Spill - ori $a0, $zero, 2048 - xvreplgr2vr.w $xr0, $a0 + xvst $xr0, $sp, 64 # 32-byte Folded Spill + vrepli.b $vr0, 0 + vst $vr0, $sp, 48 # 16-byte Folded Spill + xvldi $xr0, -3832 xvst $xr0, $sp, 16 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LJTI1_0) addi.d $s7, $a0, %pc_lo12(.LJTI1_0) @@ -230,7 +229,7 @@ start_pass: # @start_pass ld.d $a5, $a3, 8 vinsgr2vr.d $vr0, $a4, 0 vinsgr2vr.d $vr1, $a5, 0 - vld $vr2, $sp, 272 # 16-byte Folded Reload + vld $vr2, $sp, 48 # 16-byte Folded Reload vilvl.h $vr0, $vr2, $vr0 vilvl.h $vr1, $vr2, $vr1 ld.d $a4, $a3, 16 @@ -813,7 +812,7 @@ start_pass: # @start_pass xvld $xr5, $sp, 16 # 32-byte Folded Reload xvori.b $xr0, $xr5, 0 vld $vr2, $a3, 16 - xvld $xr3, $sp, 240 # 32-byte Folded Reload + xvld $xr3, $sp, 256 # 32-byte Folded Reload xvmadd.w $xr0, $xr1, $xr3 xvsrli.w $xr0, $xr0, 12 xvst $xr0, $a2, 0 @@ -843,7 +842,7 @@ start_pass: # @start_pass xvinsgr2vr.w $xr0, $a4, 7 xvori.b $xr1, $xr5, 0 vld $vr2, $a3, 32 - xvld $xr4, $sp, 208 # 32-byte Folded Reload + xvld $xr4, $sp, 224 # 32-byte Folded Reload xvmadd.w $xr1, $xr0, $xr4 xvsrli.w $xr0, $xr1, 12 xvst $xr0, $a2, 32 @@ -873,7 +872,7 @@ start_pass: # @start_pass xvinsgr2vr.w $xr0, $a4, 7 xvori.b $xr1, $xr5, 0 vld $vr2, $a3, 48 - xvld $xr4, $sp, 176 # 32-byte Folded Reload + xvld $xr4, $sp, 192 # 32-byte Folded Reload xvmadd.w $xr1, $xr0, $xr4 xvsrli.w $xr0, $xr1, 12 xvst $xr0, $a2, 64 @@ -903,7 +902,7 @@ start_pass: # @start_pass xvinsgr2vr.w $xr0, $a4, 7 xvori.b $xr1, $xr5, 0 vld $vr2, $a3, 64 - xvld $xr4, $sp, 144 # 32-byte Folded Reload + xvld $xr4, $sp, 160 # 32-byte Folded Reload xvmadd.w $xr1, $xr0, $xr4 xvsrli.w $xr0, $xr1, 12 xvst $xr0, $a2, 96 @@ -962,7 +961,7 @@ start_pass: # @start_pass xvinsgr2vr.w $xr0, $a4, 7 xvori.b $xr1, $xr5, 0 vld $vr2, $a3, 96 - xvld $xr3, $sp, 112 # 32-byte Folded Reload + xvld $xr3, $sp, 128 # 32-byte Folded Reload xvmadd.w $xr1, $xr0, $xr3 xvsrli.w $xr0, $xr1, 12 xvst $xr0, $a2, 160 @@ -992,7 +991,7 @@ start_pass: # @start_pass xvinsgr2vr.w $xr0, $a4, 7 xvori.b $xr1, $xr5, 0 vld $vr2, $a3, 112 - xvld $xr3, $sp, 80 # 32-byte Folded Reload + xvld $xr3, $sp, 96 # 32-byte Folded Reload xvmadd.w $xr1, $xr0, $xr3 xvsrli.w $xr0, $xr1, 12 xvst $xr0, $a2, 192 @@ -1021,7 +1020,7 @@ start_pass: # @start_pass bstrpick.d $a3, $a3, 15, 0 xvinsgr2vr.w $xr0, $a3, 7 xvori.b $xr1, $xr5, 0 - xvld $xr2, $sp, 48 # 32-byte Folded Reload + xvld $xr2, $sp, 64 # 32-byte Folded Reload xvmadd.w $xr1, $xr0, $xr2 xvsrli.w $xr0, $xr1, 12 xvst $xr0, $a2, 224 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/newmdct.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/newmdct.s index eb256408..3ff41c7f 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/newmdct.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/newmdct.s @@ -1756,8 +1756,7 @@ mdct_init48: # @mdct_init48 xvld $xr0, $a0, %pc_lo12(.LCPI1_10) pcalau12i $a0, %pc_hi20(.LCPI1_11) vld $vr1, $a0, %pc_lo12(.LCPI1_11) - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr2, $a0 + xvldi $xr2, -912 xvst $xr2, $fp, 448 xvst $xr0, $fp, 480 vst $vr1, $fp, 512 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/psymodel.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/psymodel.s index ae36840b..bd259c2f 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/psymodel.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/psymodel.s @@ -974,8 +974,7 @@ L3psycho_anal: # @L3psycho_anal move $s4, $zero move $a1, $zero ori $s8, $zero, 1 - lu12i.w $a0, 258048 - xvreplgr2vr.w $xr7, $a0 + xvldi $xr7, -3265 movgr2fr.w $fa0, $zero lu12i.w $a0, -1 ori $a0, $a0, 2044 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/quantize.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/quantize.s index 2b697734..8c3c0d52 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/quantize.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/quantize.s @@ -1335,15 +1335,13 @@ VBR_iteration_loop: # @VBR_iteration_loop ld.d $a2, $sp, 24 # 8-byte Folded Reload addi.d $a2, $a2, 32 addi.d $a3, $sp, 384 - lu52i.d $a4, $zero, 1022 - xvreplgr2vr.d $xr2, $a4 + xvldi $xr2, -928 lu12i.w $a4, 335544 ori $a4, $a4, 1311 lu32i.d $a4, 335544 lu52i.d $a4, $a4, 1021 xvreplgr2vr.d $xr3, $a4 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr4, $a4 + xvldi $xr4, -912 vrepli.w $vr5, 125 move $a4, $a1 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/MiBench/telecomm-gsm/CMakeFiles/telecomm-gsm.dir/long_term.s b/results/MultiSource/Benchmarks/MiBench/telecomm-gsm/CMakeFiles/telecomm-gsm.dir/long_term.s index 55b9bbd2..c6ff93fe 100644 --- a/results/MultiSource/Benchmarks/MiBench/telecomm-gsm/CMakeFiles/telecomm-gsm.dir/long_term.s +++ b/results/MultiSource/Benchmarks/MiBench/telecomm-gsm/CMakeFiles/telecomm-gsm.dir/long_term.s @@ -39,26 +39,26 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor move $s4, $a1 vld $vr0, $a1, 16 vslti.h $vr1, $vr0, 0 - lu12i.w $a0, 8 - vreplgr2vr.h $vr2, $a0 + vldi $vr2, -2688 vseq.h $vr3, $vr0, $vr2 vneg.h $vr4, $vr0 lu12i.w $a0, 7 - ori $a1, $a0, 4095 - vld $vr5, $s4, 48 - vreplgr2vr.h $vr6, $a1 + ori $a0, $a0, 4095 + vld $vr5, $a1, 48 + st.d $a0, $sp, 8 # 8-byte Folded Spill + vreplgr2vr.h $vr6, $a0 vbitsel.v $vr3, $vr4, $vr6, $vr3 vbitsel.v $vr0, $vr0, $vr3, $vr1 vslti.h $vr1, $vr5, 0 vseq.h $vr3, $vr5, $vr2 vneg.h $vr4, $vr5 - vld $vr7, $s4, 0 + vld $vr7, $a1, 0 vbitsel.v $vr3, $vr4, $vr6, $vr3 vbitsel.v $vr1, $vr5, $vr3, $vr1 vmax.h $vr0, $vr0, $vr1 vslti.h $vr1, $vr7, 0 vseq.h $vr3, $vr7, $vr2 - vld $vr4, $s4, 32 + vld $vr4, $a1, 32 vneg.h $vr5, $vr7 vbitsel.v $vr3, $vr5, $vr6, $vr3 vbitsel.v $vr1, $vr7, $vr3, $vr1 @@ -75,7 +75,7 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor vbsrl.v $vr1, $vr0, 4 vmax.h $vr0, $vr1, $vr0 vbsrl.v $vr1, $vr0, 2 - vld $vr3, $s4, 64 + vld $vr3, $a1, 64 vmax.h $vr0, $vr1, $vr0 vpickve2gr.h $a0, $vr0, 0 vreplgr2vr.h $vr0, $a0 @@ -97,7 +97,6 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor st.d $a4, $sp, 48 # 8-byte Folded Spill st.d $a3, $sp, 56 # 8-byte Folded Spill move $s5, $a2 - st.d $a1, $sp, 8 # 8-byte Folded Spill st.d $a5, $sp, 32 # 8-byte Folded Spill beqz $a0, .LBB0_2 # %bb.1: @@ -587,10 +586,8 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor vpickve2gr.h $a1, $vr0, 7 ext.w.h $a1, $a1 xvinsgr2vr.w $xr2, $a1, 7 - lu12i.w $a1, 4 - xvreplgr2vr.w $xr0, $a1 - ld.d $a1, $sp, 8 # 8-byte Folded Reload - xvreplgr2vr.w $xr1, $a1 + xvldi $xr0, -3776 + xvldi $xr1, -2433 xvori.b $xr3, $xr0, 0 xvmadd.w $xr3, $xr2, $xr1 xvsrli.w $xr2, $xr3, 15 @@ -872,10 +869,9 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor vpickve2gr.h $a1, $vr0, 7 ext.w.h $a1, $a1 xvinsgr2vr.w $xr2, $a1, 7 - lu12i.w $a1, 4 - xvreplgr2vr.w $xr0, $a1 ori $a1, $zero, 3277 .LBB0_22: # %Long_term_analysis_filtering.exit + xvldi $xr0, -3776 xvreplgr2vr.w $xr1, $a1 xvori.b $xr3, $xr0, 0 xvmadd.w $xr3, $xr2, $xr1 @@ -1236,8 +1232,6 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor vpickve2gr.h $a1, $vr0, 7 ext.w.h $a1, $a1 xvinsgr2vr.w $xr2, $a1, 7 - lu12i.w $a1, 4 - xvreplgr2vr.w $xr0, $a1 lu12i.w $a1, 2 ori $a1, $a1, 3277 b .LBB0_22 @@ -1329,8 +1323,6 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor vpickve2gr.h $a1, $vr0, 7 ext.w.h $a1, $a1 xvinsgr2vr.w $xr2, $a1, 7 - lu12i.w $a1, 4 - xvreplgr2vr.w $xr0, $a1 lu12i.w $a1, 5 ori $a1, $a1, 819 b .LBB0_22 diff --git a/results/MultiSource/Benchmarks/MiBench/telecomm-gsm/CMakeFiles/telecomm-gsm.dir/lpc.s b/results/MultiSource/Benchmarks/MiBench/telecomm-gsm/CMakeFiles/telecomm-gsm.dir/lpc.s index 4605df4f..da60a3aa 100644 --- a/results/MultiSource/Benchmarks/MiBench/telecomm-gsm/CMakeFiles/telecomm-gsm.dir/lpc.s +++ b/results/MultiSource/Benchmarks/MiBench/telecomm-gsm/CMakeFiles/telecomm-gsm.dir/lpc.s @@ -34,15 +34,14 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis xvld $xr2, $a1, 32 xvslti.h $xr3, $xr1, 0 xvslti.h $xr4, $xr2, 0 - lu12i.w $a0, 8 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2688 xvseq.h $xr5, $xr1, $xr0 xvseq.h $xr6, $xr2, $xr0 xvneg.h $xr7, $xr1 xvneg.h $xr8, $xr2 lu12i.w $a0, 7 - ori $s4, $a0, 4095 - xvreplgr2vr.h $xr9, $s4 + ori $s3, $a0, 4095 + xvreplgr2vr.h $xr9, $s3 xvbitsel.v $xr5, $xr7, $xr9, $xr5 xvbitsel.v $xr6, $xr8, $xr9, $xr6 xvld $xr7, $a1, 64 @@ -138,8 +137,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis jr $a0 .LBB0_4: # %vector.body108.preheader move $a0, $zero - lu12i.w $a3, 4 - vreplgr2vr.w $vr0, $a3 + vldi $vr0, -3776 ori $a3, $zero, 320 .p2align 4, , 16 .LBB0_5: # %vector.body108 @@ -177,8 +175,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis b .LBB0_14 .LBB0_8: # %vector.body124.preheader move $a0, $zero - lu12i.w $a3, 4 - vreplgr2vr.w $vr0, $a3 + vldi $vr0, -3776 ori $a3, $zero, 320 .p2align 4, , 16 .LBB0_9: # %vector.body124 @@ -209,8 +206,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis b .LBB0_14 .LBB0_10: # %vector.body132.preheader move $a0, $zero - lu12i.w $a3, 4 - vreplgr2vr.w $vr0, $a3 + vldi $vr0, -3776 ori $a3, $zero, 320 .p2align 4, , 16 .LBB0_11: # %vector.body132 @@ -241,8 +237,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis b .LBB0_14 .LBB0_12: # %vector.body116.preheader move $a0, $zero - lu12i.w $a3, 4 - vreplgr2vr.w $vr0, $a3 + vldi $vr0, -3776 ori $a3, $zero, 320 .p2align 4, , 16 .LBB0_13: # %vector.body116 @@ -718,6 +713,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis vpackev.d $vr16, $vr16, $vr17 vst $vr16, $s0, 304 .LBB0_18: # %Autocorrelation.exit + lu12i.w $ra, 8 st.d $s2, $sp, 16 # 8-byte Folded Spill beqz $s1, .LBB0_21 # %bb.19: # %.preheader69.preheader.i @@ -748,7 +744,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis slli.d $fp, $a7, 1 slli.d $s0, $a6, 1 slli.d $s2, $a5, 1 - slli.d $s3, $a4, 1 + slli.d $s4, $a4, 1 slli.d $s6, $a3, 1 slli.d $s7, $a2, 1 slli.d $s8, $a1, 1 @@ -769,7 +765,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis srli.d $a6, $a6, 16 sll.d $a7, $s6, $a0 srli.d $a7, $a7, 16 - sll.d $t0, $s3, $a0 + sll.d $t0, $s4, $a0 srli.d $t0, $t0, 16 sll.d $t1, $s2, $a0 ld.d $t3, $sp, 16 # 8-byte Folded Reload @@ -799,7 +795,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis sltui $a2, $a2, 1 sub.d $a4, $zero, $s5 masknez $a4, $a4, $a2 - maskeqz $a2, $s4, $a2 + maskeqz $a2, $s3, $a2 or $a2, $a2, $a4 masknez $a4, $s5, $a0 maskeqz $a0, $a2, $a0 @@ -813,6 +809,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 + lu12i.w $ra, 8 b .LBB0_22 .LBB0_21: # %.preheader.preheader.i vrepli.b $vr0, 0 @@ -822,12 +819,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $a1, $s1, 0 ext.w.h $a0, $a1 slti $a0, $a0, 0 - lu12i.w $fp, 8 - xor $a2, $a1, $fp + xor $a2, $a1, $ra sltui $a2, $a2, 1 sub.d $a3, $zero, $a1 masknez $a3, $a3, $a2 - maskeqz $a2, $s4, $a2 + maskeqz $a2, $s3, $a2 or $a2, $a2, $a3 maskeqz $a2, $a2, $a0 masknez $a1, $a1, $a0 @@ -853,9 +849,9 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis .LBB0_26: # %.lr.ph.preheader move $s6, $zero ori $s7, $zero, 8 - ori $s3, $zero, 0 - lu32i.d $s3, 32768 - lu12i.w $fp, -8 + ori $fp, $zero, 0 + lu32i.d $fp, 32768 + lu12i.w $s4, -8 ori $s8, $zero, 7 move $s0, $t3 ori $a2, $zero, 1 @@ -865,16 +861,16 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis # in Loop: Header=BB0_28 Depth=1 mul.d $a0, $a1, $a0 slli.d $a0, $a0, 33 - add.d $a0, $a0, $s3 + add.d $a0, $a0, $fp srai.d $a0, $a0, 48 add.d $a0, $a0, $s1 - slt $a1, $a0, $s4 + slt $a1, $a0, $s3 maskeqz $a0, $a0, $a1 - masknez $a1, $s4, $a1 + masknez $a1, $s3, $a1 or $a0, $a0, $a1 - slt $a1, $fp, $a0 + slt $a1, $s4, $a0 maskeqz $a0, $a0, $a1 - masknez $a1, $fp, $a1 + masknez $a1, $s4, $a1 or $a1, $a0, $a1 addi.d $a2, $s2, 1 addi.d $s0, $s0, 2 @@ -882,12 +878,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ext.w.h $a0, $s5 slti $a0, $a0, 0 bstrpick.d $a3, $s5, 15, 0 - lu12i.w $a4, 8 - xor $a3, $a3, $a4 + xor $a3, $a3, $ra sltui $a3, $a3, 1 sub.d $a4, $zero, $s5 masknez $a4, $a4, $a3 - maskeqz $a3, $s4, $a3 + maskeqz $a3, $s3, $a3 or $a3, $a3, $a4 maskeqz $a3, $a3, $a0 masknez $a0, $s5, $a0 @@ -905,6 +900,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis move $a1, $s1 pcaddu18i $ra, %call36(gsm_div) jirl $ra, $ra, 0 + lu12i.w $ra, 8 ext.w.h $a1, $s5 slt $a2, $zero, $a1 sub.d $a3, $zero, $a0 @@ -933,29 +929,29 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.h $a6, $a4, 0 ld.h $a7, $a5, 0 mul.d $t0, $a3, $a6 - add.d $t0, $t0, $s3 + add.d $t0, $t0, $fp srai.d $t0, $t0, 48 add.d $t0, $t0, $a7 - slt $t1, $t0, $s4 + slt $t1, $t0, $s3 maskeqz $t0, $t0, $t1 - masknez $t1, $s4, $t1 + masknez $t1, $s3, $t1 or $t0, $t0, $t1 - slt $t1, $fp, $t0 + slt $t1, $s4, $t0 maskeqz $t0, $t0, $t1 - masknez $t1, $fp, $t1 + masknez $t1, $s4, $t1 or $t0, $t0, $t1 st.h $t0, $a5, -2 mul.d $a7, $a3, $a7 - add.d $a7, $a7, $s3 + add.d $a7, $a7, $fp srai.d $a7, $a7, 48 add.d $a6, $a7, $a6 - slt $a7, $a6, $s4 + slt $a7, $a6, $s3 maskeqz $a6, $a6, $a7 - masknez $a7, $s4, $a7 + masknez $a7, $s3, $a7 or $a6, $a6, $a7 - slt $a7, $fp, $a6 + slt $a7, $s4, $a6 maskeqz $a6, $a6, $a7 - masknez $a7, $fp, $a7 + masknez $a7, $s4, $a7 or $a6, $a6, $a7 st.h $a6, $a4, 0 addi.d $a2, $a2, -1 @@ -974,11 +970,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $a4, $s1, 2 ext.w.h $a2, $a4 slti $a2, $a2, 0 - xor $a5, $a4, $fp + xor $a5, $a4, $ra sltui $a5, $a5, 1 sub.d $a6, $zero, $a4 masknez $a6, $a6, $a5 - maskeqz $a5, $s4, $a5 + maskeqz $a5, $s3, $a5 or $a5, $a5, $a6 maskeqz $a5, $a5, $a2 masknez $a4, $a4, $a2 @@ -1006,11 +1002,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $a6, $s1, 4 ext.w.h $a4, $a6 slti $a4, $a4, 0 - xor $a7, $a6, $fp + xor $a7, $a6, $ra sltui $a7, $a7, 1 sub.d $t0, $zero, $a6 masknez $t0, $t0, $a7 - maskeqz $a7, $s4, $a7 + maskeqz $a7, $s3, $a7 or $a7, $a7, $t0 maskeqz $a7, $a7, $a4 masknez $a6, $a6, $a4 @@ -1037,11 +1033,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $t0, $s1, 6 ext.w.h $a7, $t0 slti $a7, $a7, 0 - xor $t1, $t0, $fp + xor $t1, $t0, $ra sltui $t1, $t1, 1 sub.d $t2, $zero, $t0 masknez $t2, $t2, $t1 - maskeqz $t1, $s4, $t1 + maskeqz $t1, $s3, $t1 or $t1, $t1, $t2 maskeqz $t1, $t1, $a7 masknez $t0, $t0, $a7 @@ -1068,11 +1064,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $t2, $s1, 8 ext.w.h $t1, $t2 slti $t1, $t1, 0 - xor $t3, $t2, $fp + xor $t3, $t2, $ra sltui $t3, $t3, 1 sub.d $t4, $zero, $t2 masknez $t4, $t4, $t3 - maskeqz $t3, $s4, $t3 + maskeqz $t3, $s3, $t3 or $t3, $t3, $t4 maskeqz $t3, $t3, $t1 masknez $t2, $t2, $t1 @@ -1099,11 +1095,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $t4, $s1, 10 ext.w.h $t3, $t4 slti $t3, $t3, 0 - xor $t5, $t4, $fp + xor $t5, $t4, $ra sltui $t5, $t5, 1 sub.d $t6, $zero, $t4 masknez $t6, $t6, $t5 - maskeqz $t5, $s4, $t5 + maskeqz $t5, $s3, $t5 or $t5, $t5, $t6 maskeqz $t5, $t5, $t3 masknez $t4, $t4, $t3 @@ -1130,11 +1126,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $t6, $s1, 12 ext.w.h $t5, $t6 slti $t5, $t5, 0 - xor $t7, $t6, $fp + xor $t7, $t6, $ra sltui $t7, $t7, 1 sub.d $t8, $zero, $t6 masknez $t8, $t8, $t7 - maskeqz $t7, $s4, $t7 + maskeqz $t7, $s3, $t7 or $t7, $t7, $t8 maskeqz $t7, $t7, $t5 masknez $t6, $t6, $t5 @@ -1161,11 +1157,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $t8, $s1, 14 ext.w.h $t6, $t8 slti $t6, $t6, 0 - xor $fp, $t8, $fp + xor $fp, $t8, $ra sltui $fp, $fp, 1 sub.d $s0, $zero, $t8 masknez $s0, $s0, $fp - maskeqz $fp, $s4, $fp + maskeqz $fp, $s3, $fp or $fp, $fp, $s0 maskeqz $fp, $fp, $t6 masknez $t8, $t8, $t6 diff --git a/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/build.s b/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/build.s index 6ce411ff..0093f57f 100644 --- a/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/build.s +++ b/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/build.s @@ -155,12 +155,12 @@ build_branch: # @build_branch # %bb.0: beqz $a2, .LBB2_2 # %bb.1: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill - st.d $s2, $sp, 40 # 8-byte Folded Spill + addi.d $sp, $sp, -48 + st.d $ra, $sp, 40 # 8-byte Folded Spill + st.d $fp, $sp, 32 # 8-byte Folded Spill + st.d $s0, $sp, 24 # 8-byte Folded Spill + st.d $s1, $sp, 16 # 8-byte Folded Spill + st.d $s2, $sp, 8 # 8-byte Folded Spill move $s2, $a2 move $s0, $a1 move $s1, $a0 @@ -177,69 +177,67 @@ build_branch: # @build_branch ori $a0, $zero, 32 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - lu52i.d $a1, $zero, 1023 - vreplgr2vr.d $vr0, $a1 - vst $vr0, $sp, 16 # 16-byte Folded Spill + vldi $vr0, -912 vst $vr0, $a0, 0 st.d $a0, $fp, 56 ori $a0, $zero, 32 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vldi $vr0, -912 vst $vr0, $a0, 0 st.d $a0, $fp, 64 ori $a0, $zero, 32 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vldi $vr0, -912 vst $vr0, $a0, 0 st.d $a0, $fp, 72 ori $a0, $zero, 32 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vldi $vr0, -912 vst $vr0, $a0, 0 st.d $a0, $fp, 80 ori $a0, $zero, 32 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vldi $vr0, -912 vst $vr0, $a0, 0 st.d $a0, $fp, 88 ori $a0, $zero, 32 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vldi $vr0, -912 vst $vr0, $a0, 0 st.d $a0, $fp, 96 ori $a0, $zero, 32 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vldi $vr0, -912 vst $vr0, $a0, 0 st.d $a0, $fp, 104 ori $a0, $zero, 32 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vldi $vr0, -912 vst $vr0, $a0, 0 st.d $a0, $fp, 112 ori $a0, $zero, 32 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vldi $vr0, -912 vst $vr0, $a0, 0 st.d $a0, $fp, 120 ori $a0, $zero, 32 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vldi $vr0, -912 vst $vr0, $a0, 0 st.d $a0, $fp, 128 ori $a0, $zero, 32 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vldi $vr0, -912 vst $vr0, $a0, 0 st.d $a0, $fp, 136 ori $a0, $zero, 32 @@ -249,18 +247,18 @@ build_branch: # @build_branch vld $vr0, $a1, %pc_lo12(.LCPI2_0) move $a1, $a0 move $a0, $fp - vld $vr1, $sp, 16 # 16-byte Folded Reload + vldi $vr1, -912 vst $vr1, $a1, 0 st.d $a1, $fp, 144 vst $vr0, $fp, 32 vrepli.b $vr0, 0 vst $vr0, $fp, 16 - ld.d $s2, $sp, 40 # 8-byte Folded Reload - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 8 # 8-byte Folded Reload + ld.d $s1, $sp, 16 # 8-byte Folded Reload + ld.d $s0, $sp, 24 # 8-byte Folded Reload + ld.d $fp, $sp, 32 # 8-byte Folded Reload + ld.d $ra, $sp, 40 # 8-byte Folded Reload + addi.d $sp, $sp, 48 ret .LBB2_2: move $a0, $zero @@ -278,8 +276,7 @@ build_leaf: # @build_leaf ori $a0, $zero, 32 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - lu52i.d $a1, $zero, 1023 - vreplgr2vr.d $vr0, $a1 + vldi $vr0, -912 vst $vr0, $a0, 0 ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 diff --git a/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/compute.s b/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/compute.s index a9a4141f..0830e537 100644 --- a/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/compute.s +++ b/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/compute.s @@ -1226,8 +1226,7 @@ find_dd_grad_f: # @find_dd_grad_f # kill: def $f1_64 killed $f1_64 def $vr1 # kill: def $f0_64 killed $f0_64 def $vr0 vextrins.d $vr2, $vr3, 16 - lu52i.d $a1, $zero, 1023 - vreplgr2vr.d $vr3, $a1 + vldi $vr3, -912 vfadd.d $vr2, $vr2, $vr3 vfrecip.d $vr2, $vr2 vextrins.d $vr0, $vr1, 16 diff --git a/results/MultiSource/Benchmarks/PAQ8p/CMakeFiles/paq8p.dir/paq8p.s b/results/MultiSource/Benchmarks/PAQ8p/CMakeFiles/paq8p.dir/paq8p.s index 5f56a87c..7ad2b72c 100644 --- a/results/MultiSource/Benchmarks/PAQ8p/CMakeFiles/paq8p.dir/paq8p.s +++ b/results/MultiSource/Benchmarks/PAQ8p/CMakeFiles/paq8p.dir/paq8p.s @@ -677,30 +677,30 @@ _Z5trainPsS_ii: # @_Z5trainPsS_ii # %bb.0: blez $a2, .LBB13_7 # %bb.1: # %.lr.ph.preheader - addi.w $a6, $a2, 7 - move $a2, $a6 + addi.w $a5, $a2, 7 + move $a2, $a5 bstrins.d $a2, $zero, 2, 0 ori $a4, $zero, 1 - slt $a5, $a4, $a2 - masknez $a4, $a4, $a5 - maskeqz $a2, $a2, $a5 + slt $a6, $a4, $a2 + masknez $a4, $a4, $a6 + maskeqz $a2, $a2, $a6 or $a4, $a2, $a4 - ori $a7, $zero, 8 + ori $a6, $zero, 8 lu12i.w $a2, -8 - lu12i.w $a5, 7 - blt $a6, $a7, .LBB13_4 + blt $a5, $a6, .LBB13_4 # %bb.2: # %vector.memcheck - alsl.d $a6, $a4, $a0, 1 - bgeu $a1, $a6, .LBB13_8 + alsl.d $a5, $a4, $a0, 1 + bgeu $a1, $a5, .LBB13_8 # %bb.3: # %vector.memcheck - alsl.d $a6, $a4, $a1, 1 - bgeu $a0, $a6, .LBB13_8 + alsl.d $a5, $a4, $a1, 1 + bgeu $a0, $a5, .LBB13_8 .LBB13_4: - move $a6, $zero + move $a5, $zero .LBB13_5: # %.lr.ph.preheader17 - alsl.d $a0, $a6, $a0, 1 - alsl.d $a1, $a6, $a1, 1 - sub.d $a4, $a6, $a4 + alsl.d $a0, $a5, $a0, 1 + alsl.d $a1, $a5, $a1, 1 + sub.d $a4, $a5, $a4 + lu12i.w $a5, 7 ori $a5, $a5, 4095 .p2align 4, , 16 .LBB13_6: # %.lr.ph @@ -728,25 +728,24 @@ _Z5trainPsS_ii: # @_Z5trainPsS_ii .LBB13_7: # %._crit_edge ret .LBB13_8: # %vector.ph - bstrpick.d $a6, $a4, 30, 3 - slli.d $a6, $a6, 3 + bstrpick.d $a5, $a4, 30, 3 + slli.d $a5, $a5, 3 vreplgr2vr.w $vr0, $a3 vreplgr2vr.w $vr1, $a2 - ori $a7, $a5, 4095 - vreplgr2vr.w $vr2, $a7 - move $a7, $a1 - move $t0, $a0 - move $t1, $a6 + vldi $vr2, -2433 + move $a6, $a1 + move $a7, $a0 + move $t0, $a5 .p2align 4, , 16 .LBB13_9: # %vector.body # =>This Inner Loop Header: Depth=1 - ld.d $t2, $a7, 0 - vinsgr2vr.d $vr3, $t2, 0 - ld.d $t2, $t0, 0 + ld.d $t1, $a6, 0 + vinsgr2vr.d $vr3, $t1, 0 + ld.d $t1, $a7, 0 vilvl.h $vr3, $vr3, $vr3 vslli.w $vr3, $vr3, 16 vsrai.w $vr3, $vr3, 16 - vinsgr2vr.d $vr4, $t2, 0 + vinsgr2vr.d $vr4, $t1, 0 vilvl.h $vr4, $vr4, $vr4 vslli.w $vr4, $vr4, 16 vsrai.w $vr4, $vr4, 16 @@ -758,13 +757,13 @@ _Z5trainPsS_ii: # @_Z5trainPsS_ii vmax.w $vr3, $vr3, $vr1 vmin.w $vr3, $vr3, $vr2 vpickev.h $vr3, $vr3, $vr3 - vstelm.d $vr3, $a7, 0, 0 - addi.d $t1, $t1, -4 - addi.d $t0, $t0, 8 + vstelm.d $vr3, $a6, 0, 0 + addi.d $t0, $t0, -4 addi.d $a7, $a7, 8 - bnez $t1, .LBB13_9 + addi.d $a6, $a6, 8 + bnez $t0, .LBB13_9 # %bb.10: # %middle.block - beq $a6, $a4, .LBB13_7 + beq $a5, $a4, .LBB13_7 b .LBB13_5 .Lfunc_end13: .size _Z5trainPsS_ii, .Lfunc_end13-_Z5trainPsS_ii @@ -1079,8 +1078,7 @@ _ZN5MixerC2Eiiii: # @_ZN5MixerC2Eiiii bstrpick.d $a1, $s0, 30, 4 slli.d $a1, $a1, 4 addi.d $a2, $a0, 32 - ori $a3, $zero, 2048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3832 move $a3, $a1 .p2align 4, , 16 .LBB17_22: # %vector.body @@ -1101,8 +1099,7 @@ _ZN5MixerC2Eiiii: # @_ZN5MixerC2Eiiii slli.d $a1, $a1, 2 sub.d $a2, $a3, $a1 alsl.d $a3, $a3, $a0, 2 - ori $a4, $zero, 2048 - vreplgr2vr.w $vr0, $a4 + vldi $vr0, -3832 .p2align 4, , 16 .LBB17_26: # %vec.epilog.vector.body # =>This Inner Loop Header: Depth=1 @@ -1567,68 +1564,68 @@ _ZN8StateMapC2Ei: # @_ZN8StateMapC2Ei st.d $a0, $s0, 16 beqz $a0, .LBB20_19 # %bb.4: # %iter.check + ori $a1, $zero, 3 st.d $a0, $s0, 24 - ori $a2, $zero, 3 - lu12i.w $a1, -524288 - bltu $a2, $fp, .LBB20_7 + bltu $a1, $fp, .LBB20_7 # %bb.5: - move $a2, $zero + move $a1, $zero b .LBB20_16 .LBB20_6: # %_ZN5ArrayIjLi0EEC2Ei.exit vrepli.b $vr0, 0 vst $vr0, $s0, 16 b .LBB20_18 .LBB20_7: # %vector.main.loop.iter.check - ori $a2, $zero, 16 - bgeu $fp, $a2, .LBB20_9 + ori $a1, $zero, 16 + bgeu $fp, $a1, .LBB20_9 # %bb.8: - move $a2, $zero + move $a1, $zero b .LBB20_13 .LBB20_9: # %vector.ph - bstrpick.d $a2, $fp, 30, 4 - slli.d $a2, $a2, 4 - addi.d $a3, $a0, 32 - xvreplgr2vr.w $xr0, $a1 - move $a4, $a2 + bstrpick.d $a1, $fp, 30, 4 + slli.d $a1, $a1, 4 + addi.d $a2, $a0, 32 + xvldi $xr0, -3200 + move $a3, $a1 .p2align 4, , 16 .LBB20_10: # %vector.body # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB20_10 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB20_10 # %bb.11: # %middle.block - beq $a2, $fp, .LBB20_18 + beq $a1, $fp, .LBB20_18 # %bb.12: # %vec.epilog.iter.check - andi $a3, $fp, 12 - beqz $a3, .LBB20_16 + andi $a2, $fp, 12 + beqz $a2, .LBB20_16 .LBB20_13: # %vec.epilog.ph - move $a4, $a2 - bstrpick.d $a2, $fp, 30, 2 - slli.d $a2, $a2, 2 - sub.d $a3, $a4, $a2 - alsl.d $a4, $a4, $a0, 2 - vreplgr2vr.w $vr0, $a1 + move $a3, $a1 + bstrpick.d $a1, $fp, 30, 2 + slli.d $a1, $a1, 2 + sub.d $a2, $a3, $a1 + alsl.d $a3, $a3, $a0, 2 + vldi $vr0, -3200 .p2align 4, , 16 .LBB20_14: # %vec.epilog.vector.body # =>This Inner Loop Header: Depth=1 - vst $vr0, $a4, 0 - addi.d $a3, $a3, 4 - addi.d $a4, $a4, 16 - bnez $a3, .LBB20_14 + vst $vr0, $a3, 0 + addi.d $a2, $a2, 4 + addi.d $a3, $a3, 16 + bnez $a2, .LBB20_14 # %bb.15: # %vec.epilog.middle.block - beq $a2, $fp, .LBB20_18 + beq $a1, $fp, .LBB20_18 .LBB20_16: # %vec.epilog.scalar.ph.preheader - alsl.d $a0, $a2, $a0, 2 - lu32i.d $a1, 0 + alsl.d $a0, $a1, $a0, 2 + lu12i.w $a2, -524288 + lu32i.d $a2, 0 .p2align 4, , 16 .LBB20_17: # %vec.epilog.scalar.ph # =>This Inner Loop Header: Depth=1 - st.w $a1, $a0, 0 - addi.d $a2, $a2, 1 + st.w $a2, $a0, 0 + addi.d $a1, $a1, 1 addi.d $a0, $a0, 4 - bltu $a2, $fp, .LBB20_17 + bltu $a1, $fp, .LBB20_17 .LBB20_18: # %._crit_edge ld.d $s0, $sp, 8 # 8-byte Folded Reload ld.d $fp, $sp, 16 # 8-byte Folded Reload @@ -1781,63 +1778,62 @@ _ZN3APMC2Ei: # @_ZN3APMC2Ei st.d $a0, $fp, 16 beqz $a0, .LBB23_18 # %bb.4: # %iter.check - alsl.w $a3, $s0, $s1, 3 + alsl.w $a2, $s0, $s1, 3 st.d $a0, $fp, 24 - bstrpick.d $a1, $a3, 31, 0 - ori $a4, $zero, 16 - lu12i.w $a2, -524288 - bgeu $a3, $a4, .LBB23_9 + bstrpick.d $a1, $a2, 31, 0 + ori $a3, $zero, 16 + xvldi $xr0, -3200 + bgeu $a2, $a3, .LBB23_9 # %bb.5: - move $a3, $zero + move $a2, $zero .LBB23_6: # %vec.epilog.vector.body.preheader - sub.d $a4, $a1, $a3 - alsl.d $a3, $a3, $a0, 2 - xvreplgr2vr.w $xr0, $a2 + sub.d $a3, $a1, $a2 + alsl.d $a2, $a2, $a0, 2 .p2align 4, , 16 .LBB23_7: # %vec.epilog.vector.body # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 32 - bnez $a4, .LBB23_7 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 32 + bnez $a3, .LBB23_7 b .LBB23_15 .LBB23_8: # %_ZN8StateMapC2Ei.exit vrepli.b $vr0, 0 vst $vr0, $fp, 16 b .LBB23_17 .LBB23_9: # %vector.ph - bstrpick.d $a3, $a1, 30, 4 - slli.d $a3, $a3, 4 - addi.d $a4, $a0, 32 - xvreplgr2vr.w $xr0, $a2 - move $a5, $a3 + bstrpick.d $a2, $a1, 30, 4 + slli.d $a2, $a2, 4 + addi.d $a3, $a0, 32 + move $a4, $a2 .p2align 4, , 16 .LBB23_10: # %vector.body # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a4, -32 - xvst $xr0, $a4, 0 - addi.d $a5, $a5, -16 - addi.d $a4, $a4, 64 - bnez $a5, .LBB23_10 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB23_10 # %bb.11: # %middle.block - beq $a1, $a3, .LBB23_15 + beq $a1, $a2, .LBB23_15 # %bb.12: # %vec.epilog.iter.check - andi $a4, $a1, 8 - bnez $a4, .LBB23_6 + andi $a3, $a1, 8 + bnez $a3, .LBB23_6 # %bb.13: # %vec.epilog.scalar.ph.preheader - sub.d $a3, $a1, $a3 - slli.d $a4, $a1, 2 - bstrpick.d $a4, $a4, 32, 6 - slli.d $a4, $a4, 6 - add.d $a4, $a0, $a4 - lu32i.d $a2, 0 + sub.d $a2, $a1, $a2 + slli.d $a3, $a1, 2 + bstrpick.d $a3, $a3, 32, 6 + slli.d $a3, $a3, 6 + add.d $a3, $a0, $a3 + lu12i.w $a4, -524288 + lu32i.d $a4, 0 .p2align 4, , 16 .LBB23_14: # %vec.epilog.scalar.ph # =>This Inner Loop Header: Depth=1 - st.w $a2, $a4, 0 - addi.d $a3, $a3, -1 - addi.d $a4, $a4, 4 - bnez $a3, .LBB23_14 + st.w $a4, $a3, 0 + addi.d $a2, $a2, -1 + addi.d $a3, $a3, 4 + bnez $a2, .LBB23_14 .LBB23_15: # %vector.body20.preheader pcalau12i $a2, %pc_hi20(.LCPI23_0) xvld $xr0, $a2, %pc_lo12(.LCPI23_0) @@ -1845,8 +1841,7 @@ _ZN3APMC2Ei: # @_ZN3APMC2Ei ori $a2, $a2, 2731 xvreplgr2vr.w $xr1, $a2 xvrepli.w $xr2, 24 - lu12i.w $a2, 128 - xvreplgr2vr.w $xr3, $a2 + xvldi $xr3, -3576 lu12i.w $a2, 21845 ori $a2, $a2, 1366 xvreplgr2vr.w $xr4, $a2 @@ -2160,8 +2155,7 @@ _ZN10ContextMapC2Eii: # @_ZN10ContextMapC2Eii vld $vr0, $a0, %pc_lo12(.LCPI24_0) vst $vr0, $sp, 112 # 16-byte Folded Spill move $s4, $zero - lu12i.w $a0, -524288 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3200 xvst $xr0, $sp, 80 # 32-byte Folded Spill .p2align 4, , 16 .LBB24_23: # =>This Inner Loop Header: Depth=1 @@ -3766,64 +3760,64 @@ _ZN25SmallStationaryContextMapC2Ei: # @_ZN25SmallStationaryContextMapC2Ei st.d $a0, $fp, 8 beqz $a0, .LBB32_18 # %bb.5: # %iter.check - move $a3, $zero + move $a2, $zero st.d $a0, $fp, 16 st.w $zero, $fp, 24 - bstrpick.d $a2, $s0, 31, 0 - ori $a4, $zero, 7 - lu12i.w $a1, 8 - bgeu $a4, $s0, .LBB32_15 + ori $a3, $zero, 7 + bstrpick.d $a1, $s0, 31, 0 + bgeu $a3, $s0, .LBB32_15 # %bb.6: # %vector.main.loop.iter.check - ori $a3, $zero, 32 - bgeu $s0, $a3, .LBB32_8 + ori $a2, $zero, 32 + bgeu $s0, $a2, .LBB32_8 # %bb.7: - move $a3, $zero + move $a2, $zero b .LBB32_12 .LBB32_8: # %vector.ph - bstrpick.d $a3, $a2, 29, 5 - slli.d $a3, $a3, 5 - addi.d $a4, $a0, 32 - xvreplgr2vr.h $xr0, $a1 - move $a5, $a3 + bstrpick.d $a2, $a1, 29, 5 + slli.d $a2, $a2, 5 + addi.d $a3, $a0, 32 + xvldi $xr0, -2688 + move $a4, $a2 .p2align 4, , 16 .LBB32_9: # %vector.body # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a4, -32 - xvst $xr0, $a4, 0 - addi.d $a5, $a5, -32 - addi.d $a4, $a4, 64 - bnez $a5, .LBB32_9 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -32 + addi.d $a3, $a3, 64 + bnez $a4, .LBB32_9 # %bb.10: # %middle.block - beq $a3, $a2, .LBB32_17 + beq $a2, $a1, .LBB32_17 # %bb.11: # %vec.epilog.iter.check - andi $a4, $a2, 24 - beqz $a4, .LBB32_15 + andi $a3, $a1, 24 + beqz $a3, .LBB32_15 .LBB32_12: # %vec.epilog.ph - move $a5, $a3 - bstrpick.d $a3, $a2, 29, 3 - slli.d $a3, $a3, 3 - sub.d $a4, $a5, $a3 - slli.d $a5, $a5, 1 - vreplgr2vr.h $vr0, $a1 + move $a4, $a2 + bstrpick.d $a2, $a1, 29, 3 + slli.d $a2, $a2, 3 + sub.d $a3, $a4, $a2 + slli.d $a4, $a4, 1 + vldi $vr0, -2688 .p2align 4, , 16 .LBB32_13: # %vec.epilog.vector.body # =>This Inner Loop Header: Depth=1 - vstx $vr0, $a0, $a5 - addi.d $a4, $a4, 8 - addi.d $a5, $a5, 16 - bnez $a4, .LBB32_13 + vstx $vr0, $a0, $a4 + addi.d $a3, $a3, 8 + addi.d $a4, $a4, 16 + bnez $a3, .LBB32_13 # %bb.14: # %vec.epilog.middle.block - beq $a3, $a2, .LBB32_17 + beq $a2, $a1, .LBB32_17 .LBB32_15: # %vec.epilog.scalar.ph.preheader - slli.d $a4, $a3, 1 - sub.d $a2, $a2, $a3 + slli.d $a3, $a2, 1 + sub.d $a1, $a1, $a2 + lu12i.w $a2, 8 .p2align 4, , 16 .LBB32_16: # %vec.epilog.scalar.ph # =>This Inner Loop Header: Depth=1 - stx.h $a1, $a0, $a4 - addi.d $a2, $a2, -1 - addi.d $a4, $a4, 2 - bnez $a2, .LBB32_16 + stx.h $a2, $a0, $a3 + addi.d $a1, $a1, -1 + addi.d $a3, $a3, 2 + bnez $a1, .LBB32_16 .LBB32_17: # %._crit_edge st.d $a0, $fp, 32 ld.d $s0, $sp, 8 # 8-byte Folded Reload @@ -16307,6 +16301,7 @@ _ZN5Mixer6updateEv: # @_ZN5Mixer6updateEv lu12i.w $fp, 7 ori $fp, $fp, 4095 vreplgr2vr.w $vr0, $t8 + vldi $vr1, -2433 b .LBB57_4 .p2align 4, , 16 .LBB57_3: # %_Z5trainPsS_ii.exit.us @@ -16372,7 +16367,7 @@ _ZN5Mixer6updateEv: # @_ZN5Mixer6updateEv b .LBB57_3 .LBB57_11: # %vector.ph # in Loop: Header=BB57_4 Depth=1 - vreplgr2vr.w $vr1, $s0 + vreplgr2vr.w $vr2, $s0 move $s3, $a5 move $s4, $t5 .p2align 4, , 16 @@ -16380,25 +16375,24 @@ _ZN5Mixer6updateEv: # @_ZN5Mixer6updateEv # Parent Loop BB57_4 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $s5, $s2, 0 - vinsgr2vr.d $vr2, $s5, 0 - ld.d $s5, $s3, 0 - vilvl.h $vr2, $vr2, $vr2 - vslli.w $vr2, $vr2, 16 - vsrai.w $vr2, $vr2, 16 vinsgr2vr.d $vr3, $s5, 0 + ld.d $s5, $s3, 0 vilvl.h $vr3, $vr3, $vr3 vslli.w $vr3, $vr3, 16 vsrai.w $vr3, $vr3, 16 - vmul.w $vr3, $vr1, $vr3 - vsrai.w $vr3, $vr3, 15 - vaddi.wu $vr3, $vr3, 1 - vsrai.w $vr3, $vr3, 1 - vadd.w $vr2, $vr3, $vr2 - vmax.w $vr2, $vr2, $vr0 - vreplgr2vr.w $vr3, $fp - vmin.w $vr2, $vr2, $vr3 - vpickev.h $vr2, $vr2, $vr2 - vstelm.d $vr2, $s2, 0, 0 + vinsgr2vr.d $vr4, $s5, 0 + vilvl.h $vr4, $vr4, $vr4 + vslli.w $vr4, $vr4, 16 + vsrai.w $vr4, $vr4, 16 + vmul.w $vr4, $vr2, $vr4 + vsrai.w $vr4, $vr4, 15 + vaddi.wu $vr4, $vr4, 1 + vsrai.w $vr4, $vr4, 1 + vadd.w $vr3, $vr4, $vr3 + vmax.w $vr3, $vr3, $vr0 + vmin.w $vr3, $vr3, $vr1 + vpickev.h $vr3, $vr3, $vr3 + vstelm.d $vr3, $s2, 0, 0 addi.d $s4, $s4, -4 addi.d $s3, $s3, 8 addi.d $s2, $s2, 8 @@ -16630,6 +16624,7 @@ _ZN5Mixer1pEv: # @_ZN5Mixer1pEv lu12i.w $a0, 7 ori $s5, $a0, 4095 vreplgr2vr.w $vr3, $s4 + vldi $vr4, -2433 ori $s6, $zero, 2 vst $vr3, $sp, 16 # 16-byte Folded Spill b .LBB59_3 @@ -16684,6 +16679,7 @@ _ZN5Mixer1pEv: # @_ZN5Mixer1pEv # in Loop: Header=BB59_3 Depth=1 st.w $a2, $fp, 96 vld $vr3, $sp, 16 # 16-byte Folded Reload + vldi $vr4, -2433 b .LBB59_8 .p2align 4, , 16 .LBB59_7: # in Loop: Header=BB59_3 Depth=1 @@ -16816,8 +16812,7 @@ _ZN5Mixer1pEv: # @_ZN5Mixer1pEv vsrai.w $vr2, $vr2, 1 vadd.w $vr1, $vr2, $vr1 vmax.w $vr1, $vr1, $vr3 - vreplgr2vr.w $vr2, $s5 - vmin.w $vr1, $vr1, $vr2 + vmin.w $vr1, $vr1, $vr4 vpickev.h $vr1, $vr1, $vr1 vstelm.d $vr1, $s1, 0, 0 addi.d $s8, $s8, -4 @@ -17588,13 +17583,13 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer lu12i.w $a1, 4 ori $a1, $a1, 3832 add.d $a1, $sp, $a1 - lu52i.d $a2, $zero, 1023 - ori $a3, $zero, 7 - bltu $a3, $s7, .LBB61_133 + ori $a2, $zero, 7 + lu52i.d $a3, $zero, 1023 + bltu $a2, $s7, .LBB61_133 .LBB61_39: # %.preheader510.us # Parent Loop BB61_18 Depth=1 # => This Inner Loop Header: Depth=2 - st.d $a2, $a1, 0 + st.d $a3, $a1, 0 addi.d $a0, $a0, -1 addi.d $a1, $a1, 8 bnez $a0, .LBB61_39 @@ -19964,7 +19959,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer lu12i.w $a0, 4 ori $a0, $a0, 3864 add.d $a0, $sp, $a0 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB61_134: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -21725,6 +21720,7 @@ _Z13contextModel2v: # @_Z13contextModel2v lu12i.w $t8, 7 ori $t8, $t8, 4095 vreplgr2vr.w $vr0, $t7 + vldi $vr1, -2433 b .LBB69_10 .p2align 4, , 16 .LBB69_9: # %_Z5trainPsS_ii.exit.us.i @@ -21790,7 +21786,7 @@ _Z13contextModel2v: # @_Z13contextModel2v b .LBB69_9 .LBB69_17: # %vector.ph # in Loop: Header=BB69_10 Depth=1 - vreplgr2vr.w $vr1, $s0 + vreplgr2vr.w $vr2, $s0 move $s3, $a0 move $s4, $t4 .p2align 4, , 16 @@ -21798,25 +21794,24 @@ _Z13contextModel2v: # @_Z13contextModel2v # Parent Loop BB69_10 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $s7, $s2, 0 - vinsgr2vr.d $vr2, $s7, 0 - ld.d $s7, $s3, 0 - vilvl.h $vr2, $vr2, $vr2 - vslli.w $vr2, $vr2, 16 - vsrai.w $vr2, $vr2, 16 vinsgr2vr.d $vr3, $s7, 0 + ld.d $s7, $s3, 0 vilvl.h $vr3, $vr3, $vr3 vslli.w $vr3, $vr3, 16 vsrai.w $vr3, $vr3, 16 - vmul.w $vr3, $vr1, $vr3 - vsrai.w $vr3, $vr3, 15 - vaddi.wu $vr3, $vr3, 1 - vsrai.w $vr3, $vr3, 1 - vadd.w $vr2, $vr3, $vr2 - vmax.w $vr2, $vr2, $vr0 - vreplgr2vr.w $vr3, $t8 - vmin.w $vr2, $vr2, $vr3 - vpickev.h $vr2, $vr2, $vr2 - vstelm.d $vr2, $s2, 0, 0 + vinsgr2vr.d $vr4, $s7, 0 + vilvl.h $vr4, $vr4, $vr4 + vslli.w $vr4, $vr4, 16 + vsrai.w $vr4, $vr4, 16 + vmul.w $vr4, $vr2, $vr4 + vsrai.w $vr4, $vr4, 15 + vaddi.wu $vr4, $vr4, 1 + vsrai.w $vr4, $vr4, 1 + vadd.w $vr3, $vr4, $vr3 + vmax.w $vr3, $vr3, $vr0 + vmin.w $vr3, $vr3, $vr1 + vpickev.h $vr3, $vr3, $vr3 + vstelm.d $vr3, $s2, 0, 0 addi.d $s4, $s4, -4 addi.d $s3, $s3, 8 addi.d $s2, $s2, 8 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makesite.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makesite.s index 8273327f..743031b3 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makesite.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makesite.s @@ -312,14 +312,13 @@ Vside: # @Vside move $a2, $a5 bstrins.d $a2, $a7, 1, 0 xvreplve0.d $xr1, $xr0 - vreplgr2vr.w $vr2, $fp pcalau12i $t0, %pc_hi20(.LCPI0_1) - vld $vr3, $t0, %pc_lo12(.LCPI0_1) + vld $vr2, $t0, %pc_lo12(.LCPI0_1) pcalau12i $t0, %pc_hi20(.LCPI0_2) - xvld $xr4, $t0, %pc_lo12(.LCPI0_2) + xvld $xr3, $t0, %pc_lo12(.LCPI0_2) + vreplgr2vr.w $vr4, $fp addi.d $t0, $a3, 48 - lu52i.d $t1, $zero, 1022 - xvreplgr2vr.d $xr5, $t1 + xvldi $xr5, -928 move $t1, $a6 .p2align 4, , 16 .LBB0_24: # %vector.body200 @@ -328,19 +327,19 @@ Vside: # @Vside st.w $s0, $t0, 4 st.w $s0, $t0, 24 st.w $s0, $t0, 44 - xvpickve2gr.d $t2, $xr4, 0 + xvpickve2gr.d $t2, $xr3, 0 vinsgr2vr.w $vr6, $t2, 0 - xvpickve2gr.d $t2, $xr4, 1 + xvpickve2gr.d $t2, $xr3, 1 vinsgr2vr.w $vr6, $t2, 1 - xvpickve2gr.d $t2, $xr4, 2 + xvpickve2gr.d $t2, $xr3, 2 vinsgr2vr.w $vr6, $t2, 2 - xvpickve2gr.d $t2, $xr4, 3 + xvpickve2gr.d $t2, $xr3, 3 vinsgr2vr.w $vr6, $t2, 3 vaddi.wu $vr6, $vr6, 1 vext2xv.du.wu $xr6, $xr6 xvffint.d.lu $xr6, $xr6 xvfmul.d $xr6, $xr1, $xr6 - vext2xv.du.wu $xr7, $xr3 + vext2xv.du.wu $xr7, $xr2 xvffint.d.lu $xr7, $xr7 xvfmul.d $xr7, $xr1, $xr7 xvftintrz.l.d $xr8, $xr7 @@ -359,7 +358,7 @@ Vside: # @Vside xvpickve2gr.d $t2, $xr9, 3 vinsgr2vr.w $vr10, $t2, 3 vsub.w $vr8, $vr8, $vr10 - vadd.w $vr9, $vr8, $vr2 + vadd.w $vr9, $vr8, $vr4 vstelm.w $vr9, $t0, -12, 0 vstelm.w $vr9, $t0, 8, 1 vstelm.w $vr9, $t0, 28, 2 @@ -389,8 +388,8 @@ Vside: # @Vside st.w $a7, $t0, -4 st.w $a7, $t0, 16 st.w $a7, $t0, 36 - xvaddi.du $xr4, $xr4, 4 - vaddi.wu $vr3, $vr3, 4 + xvaddi.du $xr3, $xr3, 4 + vaddi.wu $vr2, $vr2, 4 addi.d $t1, $t1, -4 addi.d $t0, $t0, 80 bnez $t1, .LBB0_24 @@ -460,14 +459,13 @@ Vside: # @Vside move $a2, $a5 bstrins.d $a2, $a7, 1, 0 xvreplve0.d $xr1, $xr0 - vreplgr2vr.w $vr2, $fp pcalau12i $t0, %pc_hi20(.LCPI0_1) - vld $vr3, $t0, %pc_lo12(.LCPI0_1) + vld $vr2, $t0, %pc_lo12(.LCPI0_1) pcalau12i $t0, %pc_hi20(.LCPI0_2) - xvld $xr4, $t0, %pc_lo12(.LCPI0_2) + xvld $xr3, $t0, %pc_lo12(.LCPI0_2) + vreplgr2vr.w $vr4, $fp addi.d $t0, $a3, 48 - lu52i.d $t1, $zero, 1022 - xvreplgr2vr.d $xr5, $t1 + xvldi $xr5, -928 move $t1, $a6 .p2align 4, , 16 .LBB0_31: # %vector.body182 @@ -476,19 +474,19 @@ Vside: # @Vside st.w $s0, $t0, 4 st.w $s0, $t0, 24 st.w $s0, $t0, 44 - xvpickve2gr.d $t2, $xr4, 0 + xvpickve2gr.d $t2, $xr3, 0 vinsgr2vr.w $vr6, $t2, 0 - xvpickve2gr.d $t2, $xr4, 1 + xvpickve2gr.d $t2, $xr3, 1 vinsgr2vr.w $vr6, $t2, 1 - xvpickve2gr.d $t2, $xr4, 2 + xvpickve2gr.d $t2, $xr3, 2 vinsgr2vr.w $vr6, $t2, 2 - xvpickve2gr.d $t2, $xr4, 3 + xvpickve2gr.d $t2, $xr3, 3 vinsgr2vr.w $vr6, $t2, 3 vaddi.wu $vr6, $vr6, 1 vext2xv.du.wu $xr6, $xr6 xvffint.d.lu $xr6, $xr6 xvfmul.d $xr6, $xr1, $xr6 - vext2xv.du.wu $xr7, $xr3 + vext2xv.du.wu $xr7, $xr2 xvffint.d.lu $xr7, $xr7 xvfmul.d $xr7, $xr1, $xr7 xvftintrz.l.d $xr8, $xr7 @@ -507,7 +505,7 @@ Vside: # @Vside xvpickve2gr.d $t2, $xr9, 3 vinsgr2vr.w $vr10, $t2, 3 vsub.w $vr8, $vr8, $vr10 - vsub.w $vr9, $vr2, $vr8 + vsub.w $vr9, $vr4, $vr8 vstelm.w $vr9, $t0, -12, 0 vstelm.w $vr9, $t0, 8, 1 vstelm.w $vr9, $t0, 28, 2 @@ -537,8 +535,8 @@ Vside: # @Vside st.w $a7, $t0, -4 st.w $a7, $t0, 16 st.w $a7, $t0, 36 - xvaddi.du $xr4, $xr4, 4 - vaddi.wu $vr3, $vr3, 4 + xvaddi.du $xr3, $xr3, 4 + vaddi.wu $vr2, $vr2, 4 addi.d $t1, $t1, -4 addi.d $t0, $t0, 80 bnez $t1, .LBB0_31 @@ -934,14 +932,13 @@ Hside: # @Hside move $a1, $a4 bstrins.d $a1, $a6, 1, 0 xvreplve0.d $xr1, $xr0 - vreplgr2vr.w $vr2, $s0 pcalau12i $a6, %pc_hi20(.LCPI1_1) - vld $vr3, $a6, %pc_lo12(.LCPI1_1) + vld $vr2, $a6, %pc_lo12(.LCPI1_1) pcalau12i $a6, %pc_hi20(.LCPI1_2) - xvld $xr4, $a6, %pc_lo12(.LCPI1_2) + xvld $xr3, $a6, %pc_lo12(.LCPI1_2) + vreplgr2vr.w $vr4, $s0 addi.d $a6, $a2, 48 - lu52i.d $a7, $zero, 1022 - xvreplgr2vr.d $xr5, $a7 + xvldi $xr5, -928 move $a7, $a5 .p2align 4, , 16 .LBB1_24: # %vector.body202 @@ -950,19 +947,19 @@ Hside: # @Hside st.w $fp, $a6, 8 st.w $fp, $a6, 28 st.w $fp, $a6, 48 - xvpickve2gr.d $t0, $xr4, 0 + xvpickve2gr.d $t0, $xr3, 0 vinsgr2vr.w $vr6, $t0, 0 - xvpickve2gr.d $t0, $xr4, 1 + xvpickve2gr.d $t0, $xr3, 1 vinsgr2vr.w $vr6, $t0, 1 - xvpickve2gr.d $t0, $xr4, 2 + xvpickve2gr.d $t0, $xr3, 2 vinsgr2vr.w $vr6, $t0, 2 - xvpickve2gr.d $t0, $xr4, 3 + xvpickve2gr.d $t0, $xr3, 3 vinsgr2vr.w $vr6, $t0, 3 vaddi.wu $vr6, $vr6, 1 vext2xv.du.wu $xr6, $xr6 xvffint.d.lu $xr6, $xr6 xvfmul.d $xr6, $xr1, $xr6 - vext2xv.du.wu $xr7, $xr3 + vext2xv.du.wu $xr7, $xr2 xvffint.d.lu $xr7, $xr7 xvfmul.d $xr7, $xr1, $xr7 xvftintrz.l.d $xr8, $xr7 @@ -981,7 +978,7 @@ Hside: # @Hside xvpickve2gr.d $t0, $xr9, 3 vinsgr2vr.w $vr10, $t0, 3 vsub.w $vr8, $vr8, $vr10 - vadd.w $vr9, $vr8, $vr2 + vadd.w $vr9, $vr8, $vr4 vstelm.w $vr9, $a6, -16, 0 vstelm.w $vr9, $a6, 4, 1 vstelm.w $vr9, $a6, 24, 2 @@ -1011,8 +1008,8 @@ Hside: # @Hside st.w $zero, $a6, -4 st.w $zero, $a6, 16 st.w $zero, $a6, 36 - xvaddi.du $xr4, $xr4, 4 - vaddi.wu $vr3, $vr3, 4 + xvaddi.du $xr3, $xr3, 4 + vaddi.wu $vr2, $vr2, 4 addi.d $a7, $a7, -4 addi.d $a6, $a6, 80 bnez $a7, .LBB1_24 @@ -1081,14 +1078,13 @@ Hside: # @Hside move $a1, $a4 bstrins.d $a1, $a6, 1, 0 xvreplve0.d $xr1, $xr0 - vreplgr2vr.w $vr2, $s0 pcalau12i $a6, %pc_hi20(.LCPI1_1) - vld $vr3, $a6, %pc_lo12(.LCPI1_1) + vld $vr2, $a6, %pc_lo12(.LCPI1_1) pcalau12i $a6, %pc_hi20(.LCPI1_2) - xvld $xr4, $a6, %pc_lo12(.LCPI1_2) + xvld $xr3, $a6, %pc_lo12(.LCPI1_2) + vreplgr2vr.w $vr4, $s0 addi.d $a6, $a2, 48 - lu52i.d $a7, $zero, 1022 - xvreplgr2vr.d $xr5, $a7 + xvldi $xr5, -928 move $a7, $a5 .p2align 4, , 16 .LBB1_31: # %vector.body184 @@ -1097,19 +1093,19 @@ Hside: # @Hside st.w $fp, $a6, 8 st.w $fp, $a6, 28 st.w $fp, $a6, 48 - xvpickve2gr.d $t0, $xr4, 0 + xvpickve2gr.d $t0, $xr3, 0 vinsgr2vr.w $vr6, $t0, 0 - xvpickve2gr.d $t0, $xr4, 1 + xvpickve2gr.d $t0, $xr3, 1 vinsgr2vr.w $vr6, $t0, 1 - xvpickve2gr.d $t0, $xr4, 2 + xvpickve2gr.d $t0, $xr3, 2 vinsgr2vr.w $vr6, $t0, 2 - xvpickve2gr.d $t0, $xr4, 3 + xvpickve2gr.d $t0, $xr3, 3 vinsgr2vr.w $vr6, $t0, 3 vaddi.wu $vr6, $vr6, 1 vext2xv.du.wu $xr6, $xr6 xvffint.d.lu $xr6, $xr6 xvfmul.d $xr6, $xr1, $xr6 - vext2xv.du.wu $xr7, $xr3 + vext2xv.du.wu $xr7, $xr2 xvffint.d.lu $xr7, $xr7 xvfmul.d $xr7, $xr1, $xr7 xvftintrz.l.d $xr8, $xr7 @@ -1128,7 +1124,7 @@ Hside: # @Hside xvpickve2gr.d $t0, $xr9, 3 vinsgr2vr.w $vr10, $t0, 3 vsub.w $vr8, $vr8, $vr10 - vsub.w $vr9, $vr2, $vr8 + vsub.w $vr9, $vr4, $vr8 vstelm.w $vr9, $a6, -16, 0 vstelm.w $vr9, $a6, 4, 1 vstelm.w $vr9, $a6, 24, 2 @@ -1158,8 +1154,8 @@ Hside: # @Hside st.w $zero, $a6, -4 st.w $zero, $a6, 16 st.w $zero, $a6, 36 - xvaddi.du $xr4, $xr4, 4 - vaddi.wu $vr3, $vr3, 4 + xvaddi.du $xr3, $xr3, 4 + vaddi.wu $vr2, $vr2, 4 addi.d $a7, $a7, -4 addi.d $a6, $a6, 80 bnez $a7, .LBB1_31 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/mt.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/mt.s index 19253013..bfdb31f8 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/mt.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/mt.s @@ -897,7 +897,7 @@ MTDecodeP: # @MTDecodeP stptr.d $zero, $a0, 3656 stptr.w $a3, $a0, 3664 stptr.w $a2, $a0, 3668 - vreplgr2vr.d $vr0, $a4 + vldi $vr0, -1552 ori $a4, $zero, 3640 vstx $vr0, $a0, $a4 ori $a4, $zero, 1 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/parser.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/parser.s index 5f9e0aa2..09498273 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/parser.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/parser.s @@ -29,20 +29,20 @@ parser: # @parser jirl $ra, $ra, 0 ori $s8, $zero, 1 move $s4, $zero - st.d $zero, $sp, 24 # 8-byte Folded Spill + st.d $zero, $sp, 32 # 8-byte Folded Spill bne $a0, $s8, .LBB0_112 # %bb.1: # %.lr.ph652.preheader pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $fp, $a0, %pc_lo12(.L.str.1) pcalau12i $a0, %pc_hi20(.L.str.2) addi.d $a0, $a0, %pc_lo12(.L.str.2) - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $a0, $a0, %pc_lo12(.L.str.5) st.d $a0, $sp, 128 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.8) addi.d $a0, $a0, %pc_lo12(.L.str.8) - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 96 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.11) addi.d $s6, $a0, %pc_lo12(.L.str.11) pcalau12i $a0, %pc_hi20(.L.str) @@ -51,15 +51,15 @@ parser: # @parser ori $a0, $a0, 3939 lu32i.d $a0, 225893 lu52i.d $a0, $a0, 7 - st.d $a0, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $a0, $a0, %pc_lo12(.L.str.19) - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill addi.w $a0, $zero, -9 - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 80 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.25) addi.d $a0, $a0, %pc_lo12(.L.str.25) - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.17) addi.d $s3, $a0, %pc_lo12(.L.str.17) ori $s1, $zero, 2 @@ -67,23 +67,23 @@ parser: # @parser ori $s5, $a0, 2416 pcalau12i $a0, %pc_hi20(.L.str.34) addi.d $a0, $a0, %pc_lo12(.L.str.34) - st.d $a0, $sp, 32 # 8-byte Folded Spill - st.d $zero, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $zero, $sp, 32 # 8-byte Folded Spill move $s4, $zero lu12i.w $a0, 1606 ori $a0, $a0, 368 - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill lu12i.w $a0, 472646 ori $a0, $a0, 368 lu32i.d $a0, 353385 lu52i.d $a0, $a0, 6 - st.d $a0, $sp, 8 # 8-byte Folded Spill + st.d $a0, $sp, 16 # 8-byte Folded Spill # implicit-def: $r25 - st.d $fp, $sp, 40 # 8-byte Folded Spill + st.d $fp, $sp, 48 # 8-byte Folded Spill b .LBB0_4 .p2align 4, , 16 .LBB0_2: # in Loop: Header=BB0_4 Depth=1 - ld.d $fp, $sp, 40 # 8-byte Folded Reload + ld.d $fp, $sp, 48 # 8-byte Folded Reload .LBB0_3: # %.loopexit # in Loop: Header=BB0_4 Depth=1 addi.d $a2, $sp, 152 @@ -144,7 +144,7 @@ parser: # @parser beqz $a0, .LBB0_48 # %bb.10: # in Loop: Header=BB0_4 Depth=1 ld.w $a0, $sp, 152 - ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload beq $a0, $a1, .LBB0_88 # %bb.11: # in Loop: Header=BB0_4 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.72) @@ -166,7 +166,7 @@ parser: # @parser .LBB0_13: # in Loop: Header=BB0_4 Depth=1 addi.d $a2, $sp, 1204 move $a0, $s0 - ld.d $a1, $sp, 64 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload pcaddu18i $ra, %call36(__isoc99_fscanf) jirl $ra, $ra, 0 bne $a0, $s8, .LBB0_149 @@ -181,7 +181,7 @@ parser: # @parser # %bb.15: # in Loop: Header=BB0_4 Depth=1 addi.d $a0, $sp, 152 ori $a2, $zero, 5 - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload pcaddu18i $ra, %call36(bcmp) jirl $ra, $ra, 0 bnez $a0, .LBB0_132 @@ -211,7 +211,7 @@ parser: # @parser pcaddu18i $ra, %call36(__isoc99_fscanf) jirl $ra, $ra, 0 ld.d $a0, $sp, 152 - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 88 # 8-byte Folded Reload bne $a0, $a1, .LBB0_139 # %bb.20: # %.preheader399 # in Loop: Header=BB0_4 Depth=1 @@ -256,7 +256,7 @@ parser: # @parser jirl $ra, $ra, 0 addi.d $a0, $sp, 152 ori $a2, $zero, 6 - ld.d $a1, $sp, 56 # 8-byte Folded Reload + ld.d $a1, $sp, 64 # 8-byte Folded Reload pcaddu18i $ra, %call36(bcmp) jirl $ra, $ra, 0 bnez $a0, .LBB0_136 @@ -277,7 +277,7 @@ parser: # @parser # %bb.29: # in Loop: Header=BB0_4 Depth=1 ld.w $a0, $sp, 1232 addi.w $a0, $a0, -9 - ld.d $a1, $sp, 72 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload bgeu $a1, $a0, .LBB0_141 # %bb.30: # in Loop: Header=BB0_4 Depth=1 addi.d $a2, $sp, 152 @@ -287,7 +287,7 @@ parser: # @parser jirl $ra, $ra, 0 addi.d $a0, $sp, 152 ori $a2, $zero, 13 - ld.d $a1, $sp, 48 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload pcaddu18i $ra, %call36(bcmp) jirl $ra, $ra, 0 bnez $a0, .LBB0_138 @@ -332,7 +332,7 @@ parser: # @parser # %bb.38: # in Loop: Header=BB0_4 Depth=1 addi.d $a0, $sp, 152 ori $a2, $zero, 5 - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload pcaddu18i $ra, %call36(bcmp) jirl $ra, $ra, 0 bnez $a0, .LBB0_148 @@ -353,7 +353,7 @@ parser: # @parser # %bb.41: # in Loop: Header=BB0_4 Depth=1 addi.d $a0, $sp, 152 ori $a2, $zero, 7 - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload pcaddu18i $ra, %call36(bcmp) jirl $ra, $ra, 0 bnez $a0, .LBB0_152 @@ -571,7 +571,7 @@ parser: # @parser pcaddu18i $ra, %call36(__isoc99_fscanf) jirl $ra, $ra, 0 ld.d $a0, $sp, 152 - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 88 # 8-byte Folded Reload bne $a0, $a1, .LBB0_139 # %bb.69: # %.preheader402 # in Loop: Header=BB0_4 Depth=1 @@ -687,7 +687,7 @@ parser: # @parser # %bb.82: # in Loop: Header=BB0_4 Depth=1 ld.w $a0, $sp, 1232 addi.w $a0, $a0, -9 - ld.d $a1, $sp, 72 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload bgeu $a1, $a0, .LBB0_141 # %bb.83: # in Loop: Header=BB0_4 Depth=1 pcalau12i $a0, %pc_hi20(.L.str) @@ -779,7 +779,7 @@ parser: # @parser pcaddu18i $ra, %call36(__isoc99_fscanf) jirl $ra, $ra, 0 ld.d $a0, $sp, 152 - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 88 # 8-byte Folded Reload bne $a0, $a1, .LBB0_139 # %bb.95: # %.preheader406 # in Loop: Header=BB0_4 Depth=1 @@ -819,7 +819,7 @@ parser: # @parser blt $s2, $a0, .LBB0_99 .LBB0_101: # %._crit_edge # in Loop: Header=BB0_4 Depth=1 - ld.d $s2, $sp, 24 # 8-byte Folded Reload + ld.d $s2, $sp, 32 # 8-byte Folded Reload addi.w $s2, $s2, 1 pcalau12i $a0, %pc_hi20(.L.str) addi.d $a1, $a0, %pc_lo12(.L.str) @@ -828,7 +828,7 @@ parser: # @parser pcaddu18i $ra, %call36(__isoc99_fscanf) jirl $ra, $ra, 0 ld.d $a0, $sp, 152 - ld.d $a1, $sp, 8 # 8-byte Folded Reload + ld.d $a1, $sp, 16 # 8-byte Folded Reload bne $a0, $a1, .LBB0_170 # %bb.102: # in Loop: Header=BB0_4 Depth=1 pcalau12i $a0, %pc_hi20(.L.str) @@ -871,10 +871,10 @@ parser: # @parser # %bb.106: # in Loop: Header=BB0_4 Depth=1 ld.w $a0, $sp, 1232 addi.w $a0, $a0, -9 - ld.d $a1, $sp, 72 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload bgeu $a1, $a0, .LBB0_141 # %bb.107: # in Loop: Header=BB0_4 Depth=1 - st.d $s2, $sp, 24 # 8-byte Folded Spill + st.d $s2, $sp, 32 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str) addi.d $a1, $a0, %pc_lo12(.L.str) addi.d $a2, $sp, 152 @@ -918,7 +918,7 @@ parser: # @parser st.w $s4, $fp, 0 pcalau12i $a0, %got_pc_hi20(numpads) ld.d $s1, $a0, %got_pc_lo12(numpads) - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload st.w $a0, $s1, 0 move $a0, $s0 pcaddu18i $ra, %call36(rewind) @@ -949,10 +949,8 @@ parser: # @parser vrepli.b $vr0, 0 vst $vr0, $sp, 128 # 16-byte Folded Spill lu52i.d $s2, $zero, 1023 - vreplgr2vr.d $vr0, $s2 - vst $vr0, $sp, 112 # 16-byte Folded Spill xvrepli.b $xr0, 0 - xvst $xr0, $sp, 80 # 32-byte Folded Spill + xvst $xr0, $sp, 96 # 32-byte Folded Spill .p2align 4, , 16 .LBB0_114: # %.lr.ph660 # =>This Inner Loop Header: Depth=1 @@ -966,9 +964,9 @@ parser: # @parser vld $vr0, $sp, 128 # 16-byte Folded Reload vst $vr0, $a0, 68 st.d $s2, $a0, 96 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vldi $vr0, -912 vst $vr0, $a0, 112 - xvld $xr0, $sp, 80 # 32-byte Folded Reload + xvld $xr0, $sp, 96 # 32-byte Folded Reload xvst $xr0, $a0, 20 xvst $xr0, $a0, 152 xvst $xr0, $a0, 184 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/setpwates.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/setpwates.s index 589b0729..e505b9c0 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/setpwates.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/setpwates.s @@ -16,11 +16,10 @@ setpwates: # @setpwates pcalau12i $a2, %got_pc_hi20(pinsPerLen) ld.d $a2, $a2, %got_pc_lo12(pinsPerLen) addi.d $a1, $a1, 1 - bstrpick.d $a1, $a1, 31, 0 xvldrepl.d $xr0, $a2, 0 + bstrpick.d $a1, $a1, 31, 0 ori $a2, $zero, 1 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -912 b .LBB0_3 .p2align 4, , 16 .LBB0_2: # %._crit_edge diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uaspect.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uaspect.s index bf5d29e2..cf0b8adb 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uaspect.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uaspect.s @@ -494,8 +494,7 @@ uaspect: # @uaspect vreplgr2vr.w $vr3, $s3 vreplgr2vr.w $vr4, $s4 addi.d $a3, $s8, 40 - lu52i.d $a4, $zero, 1022 - xvreplgr2vr.d $xr5, $a4 + xvldi $xr5, -928 move $a4, $a2 .p2align 4, , 16 .LBB0_23: # %vector.body diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/watesides.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/watesides.s index 991df1c7..7d68df72 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/watesides.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/watesides.s @@ -19,7 +19,7 @@ watesides: # @watesides addi.d $a4, $a4, 44 addi.d $a5, $a5, -1 lu52i.d $a6, $zero, -1025 - vreplgr2vr.d $vr0, $a6 + vldi $vr0, -784 ori $a7, $zero, 1 b .LBB0_4 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s index a7b6cdba..a912b3e4 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s @@ -891,7 +891,7 @@ findratio: # @findratio fst.d $fa0, $sp, 640 # 8-byte Folded Spill vld $vr0, $sp, 1440 # 16-byte Folded Reload fadd.d $fa0, $fa0, $fa0 - vst $vr0, $sp, 1024 # 16-byte Folded Spill + vst $vr0, $sp, 1056 # 16-byte Folded Spill fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 @@ -1028,7 +1028,7 @@ findratio: # @findratio pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 592 # 16-byte Folded Spill + vst $vr0, $sp, 608 # 16-byte Folded Spill fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 @@ -1133,7 +1133,7 @@ findratio: # @findratio pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 704 # 16-byte Folded Spill + vst $vr0, $sp, 720 # 16-byte Folded Spill fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 @@ -1233,7 +1233,7 @@ findratio: # @findratio fsub.d $fa0, $fa2, $fa1 vld $vr1, $sp, 1536 # 16-byte Folded Reload fmul.d $fa0, $fa1, $fa0 - vst $vr0, $sp, 608 # 16-byte Folded Spill + vst $vr0, $sp, 592 # 16-byte Folded Spill fld.d $fs3, $sp, 1312 # 8-byte Folded Reload fmul.d $fs0, $fa3, $fs3 fmov.d $fa0, $fs0 @@ -1262,7 +1262,7 @@ findratio: # @findratio fld.d $fa1, $s2, %pc_lo12(c) vst $vr1, $sp, 1424 # 16-byte Folded Spill # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 1056 # 16-byte Folded Spill + vst $vr0, $sp, 1040 # 16-byte Folded Spill fmul.d $fs2, $fa1, $fs6 fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(exp) @@ -1273,7 +1273,7 @@ findratio: # @findratio pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 1040 # 16-byte Folded Spill + vst $vr0, $sp, 1024 # 16-byte Folded Spill vld $vr0, $sp, 1424 # 16-byte Folded Reload fmul.d $fs3, $fa0, $fs3 fmov.d $fa0, $fs3 @@ -1285,7 +1285,7 @@ findratio: # @findratio pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 720 # 16-byte Folded Spill + vst $vr0, $sp, 704 # 16-byte Folded Spill fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 @@ -1327,13 +1327,13 @@ findratio: # @findratio fdiv.d $fa0, $fa0, $fs7 fld.d $fa1, $sp, 176 # 8-byte Folded Reload fdiv.d $fa1, $fa1, $fa2 - vori.b $vr11, $vr2, 0 + vori.b $vr3, $vr2, 0 fsub.d $fa0, $fa0, $fa1 fld.d $fa1, $sp, 168 # 8-byte Folded Reload fmadd.d $fa0, $fa0, $fa4, $fa1 vst $vr0, $sp, 1008 # 16-byte Folded Spill vori.b $vr0, $vr7, 0 - vori.b $vr3, $vr7, 0 + vori.b $vr4, $vr7, 0 vst $vr7, $sp, 1312 # 16-byte Folded Spill vextrins.d $vr0, $vr10, 0 vori.b $vr2, $vr10, 0 @@ -1353,8 +1353,8 @@ findratio: # @findratio fdiv.d $fa1, $ft10, $ft6 fsub.d $fa0, $fa0, $fa1 fdiv.d $fa1, $fa7, $fs7 - vori.b $vr17, $vr11, 0 - vst $vr11, $sp, 32 # 16-byte Folded Spill + vori.b $vr17, $vr3, 0 + vst $vr3, $sp, 32 # 16-byte Folded Spill fdiv.d $ft3, $ft10, $ft9 fsub.d $fa1, $fa1, $ft3 fneg.d $fa1, $fa1 @@ -1364,11 +1364,11 @@ findratio: # @findratio fmadd.d $ft3, $fa0, $ft4, $fa1 vld $vr15, $sp, 144 # 16-byte Folded Reload fdiv.d $fa0, $ft7, $ft5 - vld $vr4, $sp, 192 # 16-byte Folded Reload - fdiv.d $fa1, $fa4, $ft6 + vld $vr3, $sp, 192 # 16-byte Folded Reload + fdiv.d $fa1, $fa3, $ft6 fsub.d $fa0, $fa0, $fa1 fdiv.d $fa1, $ft7, $fs7 - fdiv.d $ft9, $fa4, $ft9 + fdiv.d $ft9, $fa3, $ft9 fsub.d $fa1, $fa1, $ft9 fneg.d $fa1, $fa1 fmul.d $fa1, $ft8, $fa1 @@ -1377,24 +1377,24 @@ findratio: # @findratio vextrins.d $vr0, $vr7, 16 vld $vr1, $sp, 1280 # 16-byte Folded Reload vextrins.d $vr1, $vr18, 16 - vpackod.d $vr28, $vr5, $vr3 + vpackod.d $vr28, $vr5, $vr4 vfdiv.d $vr0, $vr0, $vr28 vpackod.d $vr27, $vr6, $vr8 vfdiv.d $vr1, $vr1, $vr27 vfsub.d $vr0, $vr0, $vr1 - vld $vr3, $sp, 128 # 16-byte Folded Reload - vextrins.d $vr3, $vr11, 16 + vld $vr4, $sp, 128 # 16-byte Folded Reload + vextrins.d $vr4, $vr11, 16 vldi $vr1, -928 vld $vr7, $sp, 1440 # 16-byte Folded Reload vpackev.d $vr1, $vr7, $vr1 - vfmadd.d $vr0, $vr1, $vr0, $vr3 + vfmadd.d $vr0, $vr1, $vr0, $vr4 vst $vr0, $sp, 1280 # 16-byte Folded Spill vld $vr0, $sp, 112 # 16-byte Folded Reload vextrins.d $vr0, $vr15, 16 - vld $vr3, $sp, 1296 # 16-byte Folded Reload - vextrins.d $vr3, $vr4, 16 + vld $vr4, $sp, 1296 # 16-byte Folded Reload + vextrins.d $vr4, $vr3, 16 vfdiv.d $vr0, $vr0, $vr28 - vfdiv.d $vr17, $vr3, $vr27 + vfdiv.d $vr17, $vr4, $vr27 vfsub.d $vr0, $vr0, $vr17 vld $vr3, $sp, 208 # 16-byte Folded Reload vextrins.d $vr3, $vr12, 16 @@ -1494,7 +1494,7 @@ findratio: # @findratio vfmadd.d $vr1, $vr21, $vr23, $vr1 vld $vr9, $sp, 528 # 16-byte Folded Reload fdiv.d $ft13, $ft1, $ft9 - vld $vr4, $sp, 592 # 16-byte Folded Reload + vld $vr4, $sp, 608 # 16-byte Folded Reload fdiv.d $fs2, $fa4, $ft10 fsub.d $ft13, $ft13, $fs2 vextrins.d $vr9, $vr15, 16 @@ -1515,7 +1515,7 @@ findratio: # @findratio vfdiv.d $vr26, $vr2, $vr21 vfsub.d $vr19, $vr19, $vr26 vld $vr2, $sp, 1440 # 16-byte Folded Reload - vld $vr3, $sp, 1024 # 16-byte Folded Reload + vld $vr3, $sp, 1056 # 16-byte Folded Reload vextrins.d $vr2, $vr3, 16 vfmadd.d $vr3, $vr2, $vr19, $vr22 vst $vr3, $sp, 1568 # 16-byte Folded Spill @@ -1538,7 +1538,7 @@ findratio: # @findratio vfdiv.d $vr1, $vr4, $vr22 vfsub.d $vr0, $vr0, $vr1 vfmadd.d $vr0, $vr2, $vr0, $vr23 - vst $vr0, $sp, 1024 # 16-byte Folded Spill + vst $vr0, $sp, 1056 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_14) fld.d $fa0, $a0, %pc_lo12(.LCPI2_14) fld.d $fa1, $sp, 568 # 8-byte Folded Reload @@ -1561,7 +1561,7 @@ findratio: # @findratio vld $vr0, $sp, 320 # 16-byte Folded Reload vextrins.d $vr0, $vr2, 16 vfdiv.d $vr0, $vr0, $vr28 - vld $vr4, $sp, 704 # 16-byte Folded Reload + vld $vr4, $sp, 720 # 16-byte Folded Reload vld $vr1, $sp, 336 # 16-byte Folded Reload vextrins.d $vr1, $vr4, 16 vfdiv.d $vr1, $vr1, $vr28 @@ -1598,11 +1598,10 @@ findratio: # @findratio vextrins.d $vr2, $vr5, 16 vfdiv.d $vr30, $vr2, $vr27 vfsub.d $vr0, $vr0, $vr30 - vld $vr2, $sp, 608 # 16-byte Folded Reload + vld $vr2, $sp, 592 # 16-byte Folded Reload vextrins.d $vr2, $vr28, 16 vfmul.d $vr28, $vr3, $vr15 - lu52i.d $a0, $zero, -1026 - vreplgr2vr.d $vr15, $a0 + vldi $vr15, -800 vpackev.d $vr30, $vr28, $vr15 vfmadd.d $vr22, $vr0, $vr30, $vr2 vld $vr0, $sp, 1328 # 16-byte Folded Reload @@ -1656,7 +1655,7 @@ findratio: # @findratio fmul.d $fa1, $fa0, $fa3 vld $vr9, $sp, 688 # 16-byte Folded Reload fdiv.d $fa0, $ft1, $fa1 - vld $vr8, $sp, 720 # 16-byte Folded Reload + vld $vr8, $sp, 704 # 16-byte Folded Reload fdiv.d $ft3, $ft0, $fa2 fsub.d $fa0, $fa0, $ft3 vextrins.d $vr9, $vr4, 16 @@ -1699,7 +1698,7 @@ findratio: # @findratio vfmadd.d $vr1, $vr2, $vr12, $vr1 vld $vr11, $sp, 992 # 16-byte Folded Reload fdiv.d $fa2, $ft3, $fa3 - vld $vr5, $sp, 1056 # 16-byte Folded Reload + vld $vr5, $sp, 1040 # 16-byte Folded Reload fdiv.d $fa4, $fa5, $fs5 fsub.d $fa2, $fa2, $fa4 vextrins.d $vr11, $vr10, 16 @@ -1714,7 +1713,7 @@ findratio: # @findratio vfmadd.d $vr2, $vr14, $vr4, $vr2 vld $vr6, $sp, 976 # 16-byte Folded Reload fdiv.d $fa3, $fa6, $fa3 - vld $vr11, $sp, 1040 # 16-byte Folded Reload + vld $vr11, $sp, 1024 # 16-byte Folded Reload fdiv.d $fa4, $ft3, $fs5 fsub.d $fa3, $fa3, $fa4 vextrins.d $vr6, $vr19, 16 @@ -1787,7 +1786,7 @@ findratio: # @findratio vfsub.d $vr4, $vr4, $vr8 vld $vr8, $sp, 1440 # 16-byte Folded Reload vfadd.d $vr4, $vr4, $vr8 - vld $vr8, $sp, 1024 # 16-byte Folded Reload + vld $vr8, $sp, 1056 # 16-byte Folded Reload vfsub.d $vr4, $vr4, $vr8 fld.d $ft0, $sp, 1528 # 8-byte Folded Reload fmul.d $ft0, $ft0, $fs2 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/agrep/CMakeFiles/agrep.dir/sgrep.s b/results/MultiSource/Benchmarks/Prolangs-C/agrep/CMakeFiles/agrep.dir/sgrep.s index a1423ea0..26db7b3d 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/agrep/CMakeFiles/agrep.dir/sgrep.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/agrep/CMakeFiles/agrep.dir/sgrep.s @@ -1328,7 +1328,7 @@ initmask: # @initmask move $a2, $a1 bstrins.d $a2, $zero, 3, 0 xvrepli.b $xr1, 0 - xvreplgr2vr.w $xr2, $s2 + xvldi $xr2, -3200 move $a3, $a2 xvori.b $xr3, $xr1, 0 .p2align 4, , 16 @@ -1366,7 +1366,7 @@ initmask: # @initmask vreplgr2vr.w $vr2, $a5 vor.v $vr1, $vr2, $vr1 sub.d $a3, $a5, $a2 - vreplgr2vr.w $vr2, $s2 + vldi $vr2, -3200 .p2align 4, , 16 .LBB4_11: # %vec.epilog.vector.body # =>This Inner Loop Header: Depth=1 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/bison/CMakeFiles/mybison.dir/output.s b/results/MultiSource/Benchmarks/Prolangs-C/bison/CMakeFiles/mybison.dir/output.s index 98345ba7..00d9eb06 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/bison/CMakeFiles/mybison.dir/output.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/bison/CMakeFiles/mybison.dir/output.s @@ -2545,64 +2545,64 @@ pack_table: # @pack_table st.w $zero, $a1, %pc_lo12(high) blez $s4, .LBB17_14 # %bb.1: # %iter.check - ld.d $a2, $s3, %pc_lo12(base) - ori $a3, $zero, 7 - lu12i.w $a1, 8 - bltu $a3, $s4, .LBB17_3 + ld.d $a1, $s3, %pc_lo12(base) + ori $a2, $zero, 7 + bltu $a2, $s4, .LBB17_3 # %bb.2: - move $a3, $zero + move $a2, $zero b .LBB17_12 .LBB17_3: # %vector.main.loop.iter.check - ori $a3, $zero, 32 - bgeu $s4, $a3, .LBB17_5 + ori $a2, $zero, 32 + bgeu $s4, $a2, .LBB17_5 # %bb.4: - move $a3, $zero + move $a2, $zero b .LBB17_9 .LBB17_5: # %vector.ph - bstrpick.d $a3, $s4, 30, 5 - slli.d $a3, $a3, 5 - addi.d $a4, $a2, 32 - xvreplgr2vr.h $xr0, $a1 - move $a5, $a3 + bstrpick.d $a2, $s4, 30, 5 + slli.d $a2, $a2, 5 + addi.d $a3, $a1, 32 + xvldi $xr0, -2688 + move $a4, $a2 .p2align 4, , 16 .LBB17_6: # %vector.body # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a4, -32 - xvst $xr0, $a4, 0 - addi.d $a5, $a5, -32 - addi.d $a4, $a4, 64 - bnez $a5, .LBB17_6 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -32 + addi.d $a3, $a3, 64 + bnez $a4, .LBB17_6 # %bb.7: # %middle.block - beq $a3, $s4, .LBB17_14 + beq $a2, $s4, .LBB17_14 # %bb.8: # %vec.epilog.iter.check - andi $a4, $s4, 24 - beqz $a4, .LBB17_12 + andi $a3, $s4, 24 + beqz $a3, .LBB17_12 .LBB17_9: # %vec.epilog.ph - move $a5, $a3 - bstrpick.d $a3, $s4, 30, 3 - slli.d $a3, $a3, 3 - sub.d $a4, $a5, $a3 - alsl.d $a5, $a5, $a2, 1 - vreplgr2vr.h $vr0, $a1 + move $a4, $a2 + bstrpick.d $a2, $s4, 30, 3 + slli.d $a2, $a2, 3 + sub.d $a3, $a4, $a2 + alsl.d $a4, $a4, $a1, 1 + vldi $vr0, -2688 .p2align 4, , 16 .LBB17_10: # %vec.epilog.vector.body # =>This Inner Loop Header: Depth=1 - vst $vr0, $a5, 0 - addi.d $a4, $a4, 8 - addi.d $a5, $a5, 16 - bnez $a4, .LBB17_10 + vst $vr0, $a4, 0 + addi.d $a3, $a3, 8 + addi.d $a4, $a4, 16 + bnez $a3, .LBB17_10 # %bb.11: # %vec.epilog.middle.block - beq $a3, $s4, .LBB17_14 + beq $a2, $s4, .LBB17_14 .LBB17_12: # %vec.epilog.scalar.ph.preheader - alsl.d $a2, $a3, $a2, 1 - sub.d $a3, $s4, $a3 + alsl.d $a1, $a2, $a1, 1 + sub.d $a2, $s4, $a2 + lu12i.w $a3, 8 .p2align 4, , 16 .LBB17_13: # %vec.epilog.scalar.ph # =>This Inner Loop Header: Depth=1 - st.h $a1, $a2, 0 - addi.d $a3, $a3, -1 - addi.d $a2, $a2, 2 - bnez $a3, .LBB17_13 + st.h $a3, $a1, 0 + addi.d $a2, $a2, -1 + addi.d $a1, $a1, 2 + bnez $a2, .LBB17_13 .LBB17_14: # %.preheader34 ori $a1, $zero, 255 move $a2, $fp @@ -3299,8 +3299,7 @@ action_row: # @action_row sub.d $a3, $a4, $a2 alsl.d $a4, $a4, $a1, 1 addi.d $a4, $a4, 8 - lu12i.w $a5, 8 - vreplgr2vr.h $vr0, $a5 + vldi $vr0, -2688 b .LBB21_39 .p2align 4, , 16 .LBB21_38: # %pred.store.continue346 @@ -3503,8 +3502,7 @@ action_row: # @action_row bstrpick.d $a2, $s0, 30, 4 slli.d $a2, $a2, 4 addi.d $a3, $a1, 16 - lu12i.w $a4, 8 - xvreplgr2vr.h $xr0, $a4 + xvldi $xr0, -2688 move $a4, $a2 b .LBB21_69 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/Ptrdist/ks/CMakeFiles/ks.dir/KS-1.s b/results/MultiSource/Benchmarks/Ptrdist/ks/CMakeFiles/ks.dir/KS-1.s index 3c03695a..063b79c6 100644 --- a/results/MultiSource/Benchmarks/Ptrdist/ks/CMakeFiles/ks.dir/KS-1.s +++ b/results/MultiSource/Benchmarks/Ptrdist/ks/CMakeFiles/ks.dir/KS-1.s @@ -341,68 +341,68 @@ NetsToModules: # @NetsToModules ComputeNetCosts: # @ComputeNetCosts # %bb.0: pcalau12i $a0, %pc_hi20(numNets) - ld.d $a1, $a0, %pc_lo12(numNets) - beqz $a1, .LBB2_14 + ld.d $a0, $a0, %pc_lo12(numNets) + beqz $a0, .LBB2_14 # %bb.1: # %iter.check - ori $a3, $zero, 4 - pcalau12i $a0, %pc_hi20(cost) - addi.d $a2, $a0, %pc_lo12(cost) - lu12i.w $a0, 260096 - bgeu $a1, $a3, .LBB2_3 + ori $a2, $zero, 4 + pcalau12i $a1, %pc_hi20(cost) + addi.d $a1, $a1, %pc_lo12(cost) + bgeu $a0, $a2, .LBB2_3 # %bb.2: - move $a3, $zero + move $a2, $zero b .LBB2_12 .LBB2_3: # %vector.main.loop.iter.check - ori $a3, $zero, 16 - bgeu $a1, $a3, .LBB2_5 + ori $a2, $zero, 16 + bgeu $a0, $a2, .LBB2_5 # %bb.4: - move $a3, $zero + move $a2, $zero b .LBB2_9 .LBB2_5: # %vector.ph - addi.d $a4, $a2, 32 - move $a3, $a1 - bstrins.d $a3, $zero, 3, 0 - xvreplgr2vr.w $xr0, $a0 - move $a5, $a3 + addi.d $a3, $a1, 32 + move $a2, $a0 + bstrins.d $a2, $zero, 3, 0 + xvldi $xr0, -1424 + move $a4, $a2 .p2align 4, , 16 .LBB2_6: # %vector.body # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a4, -32 - xvst $xr0, $a4, 0 - addi.d $a5, $a5, -16 - addi.d $a4, $a4, 64 - bnez $a5, .LBB2_6 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB2_6 # %bb.7: # %middle.block - beq $a1, $a3, .LBB2_14 + beq $a0, $a2, .LBB2_14 # %bb.8: # %vec.epilog.iter.check - andi $a4, $a1, 12 - beqz $a4, .LBB2_12 + andi $a3, $a0, 12 + beqz $a3, .LBB2_12 .LBB2_9: # %vec.epilog.ph - move $a5, $a3 - move $a3, $a1 - bstrins.d $a3, $zero, 1, 0 - sub.d $a4, $a5, $a3 - alsl.d $a5, $a5, $a2, 2 - vreplgr2vr.w $vr0, $a0 + move $a4, $a2 + move $a2, $a0 + bstrins.d $a2, $zero, 1, 0 + sub.d $a3, $a4, $a2 + alsl.d $a4, $a4, $a1, 2 + vldi $vr0, -1424 .p2align 4, , 16 .LBB2_10: # %vec.epilog.vector.body # =>This Inner Loop Header: Depth=1 - vst $vr0, $a5, 0 - addi.d $a4, $a4, 4 - addi.d $a5, $a5, 16 - bnez $a4, .LBB2_10 + vst $vr0, $a4, 0 + addi.d $a3, $a3, 4 + addi.d $a4, $a4, 16 + bnez $a3, .LBB2_10 # %bb.11: # %vec.epilog.middle.block - beq $a1, $a3, .LBB2_14 + beq $a0, $a2, .LBB2_14 .LBB2_12: # %.lr.ph.preheader - sub.d $a1, $a1, $a3 - alsl.d $a2, $a3, $a2, 2 + sub.d $a0, $a0, $a2 + alsl.d $a1, $a2, $a1, 2 + lu12i.w $a2, 260096 .p2align 4, , 16 .LBB2_13: # %.lr.ph # =>This Inner Loop Header: Depth=1 - st.w $a0, $a2, 0 - addi.d $a1, $a1, -1 - addi.d $a2, $a2, 4 - bnez $a1, .LBB2_13 + st.w $a2, $a1, 0 + addi.d $a0, $a0, -1 + addi.d $a1, $a1, 4 + bnez $a0, .LBB2_13 .LBB2_14: # %._crit_edge ret .Lfunc_end2: diff --git a/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/hotspotKernel.s b/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/hotspotKernel.s index 2e911438..994811ec 100644 --- a/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/hotspotKernel.s +++ b/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/hotspotKernel.s @@ -99,9 +99,8 @@ hotspotKernel: # @hotspotKernel ori $s6, $zero, 4088 ori $s7, $zero, 511 ori $s8, $zero, 4080 - lu52i.d $a2, $zero, -1024 - xvreplgr2vr.d $xr11, $a2 lu12i.w $ra, -1 + xvldi $xr11, -896 lu12i.w $s5, -3 lu12i.w $t7, -2 lu12i.w $t8, 4 diff --git a/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/sradKernel.s b/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/sradKernel.s index 2cbc24c7..a871002e 100644 --- a/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/sradKernel.s +++ b/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/sradKernel.s @@ -29,197 +29,199 @@ srad_kernel: # @srad_kernel st.d $s6, $sp, 280 # 8-byte Folded Spill st.d $s7, $sp, 272 # 8-byte Folded Spill st.d $s8, $sp, 264 # 8-byte Folded Spill - move $ra, $a6 + move $s4, $a3 + move $ra, $a2 move $s6, $a1 - move $s7, $a0 - move $s1, $zero + move $s8, $a0 + move $s0, $zero lu12i.w $a0, 2047 - add.d $s8, $a6, $a0 + add.d $s1, $a6, $a0 lu12i.w $a1, 2046 ori $t7, $a1, 4092 - add.d $t8, $s7, $t7 + add.d $t8, $s8, $t7 add.d $a4, $s6, $t7 - add.d $fp, $a2, $t7 - add.d $s0, $a3, $t7 + add.d $a3, $a2, $t7 + add.d $fp, $s4, $t7 addu16i.d $a7, $a5, 128 addi.d $a7, $a7, -4 lu12i.w $t0, 1 ori $s2, $t0, 4 - add.d $s3, $s7, $s2 - add.d $s4, $s6, $s2 - add.d $s5, $a2, $s2 - add.d $a6, $a3, $s2 - add.d $t1, $ra, $s2 - add.d $t2, $ra, $t7 + add.d $s3, $s8, $s2 + add.d $s5, $s6, $s2 + add.d $s7, $a2, $s2 + add.d $a2, $s4, $s2 + add.d $t1, $a6, $s2 + add.d $t2, $a6, $t7 addi.d $t3, $a5, 4 sltu $t4, $s3, $a4 - sltu $t5, $s4, $t8 + sltu $t5, $s5, $t8 and $t4, $t4, $t5 - sltu $t5, $s3, $fp - sltu $t6, $s5, $t8 + sltu $t5, $s3, $a3 + sltu $t6, $s7, $t8 and $t5, $t5, $t6 or $t4, $t4, $t5 - sltu $t5, $s3, $s0 - sltu $t6, $a6, $t8 + sltu $t5, $s3, $fp + sltu $t6, $a2, $t8 and $t5, $t5, $t6 or $t4, $t4, $t5 sltu $t5, $s3, $t2 sltu $t6, $t1, $t8 and $t5, $t5, $t6 or $t4, $t4, $t5 - st.d $s3, $sp, 104 # 8-byte Folded Spill + st.d $s3, $sp, 112 # 8-byte Folded Spill sltu $t5, $s3, $a7 sltu $t6, $t3, $t8 and $t5, $t5, $t6 or $t4, $t4, $t5 - sltu $t5, $s4, $fp - sltu $t6, $s5, $a4 + sltu $t5, $s5, $a3 + sltu $t6, $s7, $a4 and $t5, $t5, $t6 or $t4, $t4, $t5 - sltu $t5, $s4, $s0 - sltu $t6, $a6, $a4 + sltu $t5, $s5, $fp + sltu $t6, $a2, $a4 and $t5, $t5, $t6 or $t4, $t4, $t5 - sltu $t5, $s4, $t2 + sltu $t5, $s5, $t2 sltu $t6, $t1, $a4 and $t5, $t5, $t6 or $t4, $t4, $t5 - st.d $s4, $sp, 96 # 8-byte Folded Spill - sltu $t5, $s4, $a7 + st.d $s5, $sp, 104 # 8-byte Folded Spill + sltu $t5, $s5, $a7 sltu $t6, $t3, $a4 and $t5, $t5, $t6 or $t4, $t4, $t5 - sltu $t5, $s5, $s0 - sltu $t6, $a6, $fp + sltu $t5, $s7, $fp + sltu $t6, $a2, $a3 and $t5, $t5, $t6 or $t4, $t4, $t5 - sltu $t5, $s5, $t2 - sltu $t6, $t1, $fp + sltu $t5, $s7, $t2 + sltu $t6, $t1, $a3 and $t5, $t5, $t6 or $t4, $t4, $t5 - st.d $s5, $sp, 88 # 8-byte Folded Spill - sltu $t5, $s5, $a7 - sltu $t6, $t3, $fp + st.d $s7, $sp, 96 # 8-byte Folded Spill + sltu $t5, $s7, $a7 + sltu $t6, $t3, $a3 and $t5, $t5, $t6 or $t4, $t4, $t5 - sltu $t5, $a6, $t2 - sltu $t6, $t1, $s0 + sltu $t5, $a2, $t2 + sltu $t6, $t1, $fp and $t5, $t5, $t6 or $t4, $t4, $t5 - st.d $a6, $sp, 80 # 8-byte Folded Spill - sltu $t5, $a6, $a7 - sltu $t6, $t3, $s0 + st.d $a2, $sp, 88 # 8-byte Folded Spill + sltu $t5, $a2, $a7 + sltu $t6, $t3, $fp and $t5, $t5, $t6 - add.d $t6, $s7, $a0 + add.d $t6, $s8, $a0 or $t4, $t4, $t5 - add.d $s3, $a5, $a0 + add.d $s7, $a5, $a0 sltu $t2, $t3, $t2 - addu16i.d $t3, $ra, 128 + addu16i.d $t3, $a6, 128 sltu $t1, $t1, $a7 and $t1, $t1, $t2 or $t1, $t4, $t1 - sltu $t2, $s3, $t3 - sltu $t4, $s8, $a7 + sltu $t2, $s7, $t3 + st.d $s1, $sp, 168 # 8-byte Folded Spill + sltu $t4, $s1, $a7 and $t2, $t2, $t4 - addu16i.d $t4, $s7, 128 + addu16i.d $t4, $s8, 128 addi.d $t4, $t4, -4 - sltu $t4, $s3, $t4 + sltu $t4, $s7, $t4 + st.d $t6, $sp, 216 # 8-byte Folded Spill sltu $t5, $t6, $a7 and $t4, $t4, $t5 or $t2, $t2, $t4 addu16i.d $t4, $s6, 128 addi.d $t4, $t4, -4 - sltu $t4, $s3, $t4 + sltu $t4, $s7, $t4 add.d $t5, $s6, $a0 st.d $t5, $sp, 208 # 8-byte Folded Spill sltu $t5, $t5, $a7 and $t4, $t4, $t5 or $t2, $t2, $t4 - addu16i.d $t4, $a2, 128 + addu16i.d $t4, $ra, 128 addi.d $t4, $t4, -4 - sltu $t4, $s3, $t4 - add.d $t5, $a2, $a0 + sltu $t4, $s7, $t4 + add.d $t5, $ra, $a0 st.d $t5, $sp, 200 # 8-byte Folded Spill sltu $t5, $t5, $a7 and $t4, $t4, $t5 or $t2, $t2, $t4 - addu16i.d $t4, $a3, 128 + addu16i.d $t4, $s4, 128 addi.d $t4, $t4, -4 - st.d $s3, $sp, 72 # 8-byte Folded Spill - sltu $t4, $s3, $t4 - add.d $t5, $a3, $a0 + sltu $t4, $s7, $t4 + add.d $t5, $s4, $a0 + st.d $t5, $sp, 192 # 8-byte Folded Spill sltu $a7, $t5, $a7 and $a7, $t4, $a7 or $a7, $t2, $a7 - st.d $a7, $sp, 64 # 8-byte Folded Spill + st.d $a7, $sp, 72 # 8-byte Folded Spill addi.d $a7, $t3, -4 add.d $t3, $a5, $t7 st.d $a7, $sp, 224 # 8-byte Folded Spill sltu $a7, $a5, $a7 - sltu $t2, $ra, $t3 + sltu $t2, $a6, $t3 and $a7, $a7, $t2 sltu $t4, $a5, $t8 - sltu $t2, $s7, $t3 + sltu $t2, $s8, $t3 and $t2, $t4, $t2 or $t2, $a7, $t2 sltu $a4, $a5, $a4 sltu $a7, $s6, $t3 and $a4, $a4, $a7 or $a4, $t2, $a4 - sltu $t2, $a5, $fp - sltu $a7, $a2, $t3 - and $a7, $t2, $a7 + sltu $a3, $a5, $a3 + sltu $a7, $ra, $t3 + and $a3, $a3, $a7 add.d $a1, $a5, $a1 st.d $a1, $sp, 232 # 8-byte Folded Spill - or $a1, $a4, $a7 - sltu $a7, $a5, $s0 - sltu $a4, $a3, $t3 - and $a4, $a7, $a4 - or $a1, $a1, $a4 - st.d $a1, $sp, 256 # 8-byte Folded Spill - st.d $s2, $sp, 112 # 8-byte Folded Spill + or $a1, $a4, $a3 + sltu $a4, $a5, $fp + sltu $a3, $s4, $t3 + and $a3, $a4, $a3 + or $a1, $a1, $a3 + st.d $a1, $sp, 248 # 8-byte Folded Spill + st.d $s2, $sp, 120 # 8-byte Folded Spill add.d $a1, $a5, $s2 - st.d $a1, $sp, 56 # 8-byte Folded Spill - add.d $a1, $ra, $t0 + st.d $a1, $sp, 64 # 8-byte Folded Spill + add.d $a1, $a6, $t0 st.d $a1, $sp, 184 # 8-byte Folded Spill ori $a0, $a0, 4 - add.d $a0, $ra, $a0 - st.d $a0, $sp, 48 # 8-byte Folded Spill + add.d $a0, $a6, $a0 + st.d $a0, $sp, 56 # 8-byte Folded Spill movgr2fr.w $fa0, $zero vldi $vr1, -928 vldi $vr2, -944 vldi $vr3, -912 vldi $vr4, -1168 andi $a0, $t1, 1 - st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a0, $sp, 240 # 8-byte Folded Spill vldi $vr5, -960 lu12i.w $a0, 65536 - lu52i.d $a0, $a0, 1023 - st.d $a0, $sp, 240 # 8-byte Folded Spill - add.d $a0, $a3, $t0 + lu52i.d $s1, $a0, 1023 + add.d $a0, $s4, $t0 st.d $a0, $sp, 40 # 8-byte Folded Spill - add.d $a0, $s7, $t0 + add.d $a0, $s8, $t0 st.d $a0, $sp, 32 # 8-byte Folded Spill add.d $a0, $a5, $t0 st.d $a0, $sp, 24 # 8-byte Folded Spill add.d $a0, $s6, $t0 st.d $a0, $sp, 16 # 8-byte Folded Spill - add.d $a0, $a2, $t0 + add.d $a0, $ra, $t0 st.d $a0, $sp, 8 # 8-byte Folded Spill - ori $t3, $zero, 512 - ori $t7, $zero, 128 - ori $t8, $zero, 4092 + ori $a2, $zero, 512 + ori $t5, $zero, 128 + ori $t6, $zero, 4092 lu12i.w $t2, -1 ori $t4, $zero, 4064 - st.d $ra, $sp, 168 # 8-byte Folded Spill - st.d $a5, $sp, 160 # 8-byte Folded Spill - st.d $a3, $sp, 152 # 8-byte Folded Spill - st.d $a2, $sp, 144 # 8-byte Folded Spill + ori $s3, $zero, 4060 + lu52i.d $s5, $zero, -1029 + st.d $a6, $sp, 160 # 8-byte Folded Spill + st.d $a5, $sp, 152 # 8-byte Folded Spill + st.d $s4, $sp, 144 # 8-byte Folded Spill st.d $s6, $sp, 136 # 8-byte Folded Spill - st.d $s7, $sp, 128 # 8-byte Folded Spill - st.d $s8, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 216 # 8-byte Folded Spill - st.d $t5, $sp, 192 # 8-byte Folded Spill + st.d $s8, $sp, 128 # 8-byte Folded Spill + st.d $s7, $sp, 80 # 8-byte Folded Spill + st.d $ra, $sp, 48 # 8-byte Folded Spill .p2align 4, , 16 .LBB0_1: # %.preheader624 # =>This Loop Header: Depth=1 @@ -237,32 +239,34 @@ srad_kernel: # @srad_kernel move $a1, $a5 fmov.s $fa7, $fa0 fmov.s $fa6, $fa0 + ld.d $a4, $sp, 216 # 8-byte Folded Reload ld.d $a7, $sp, 208 # 8-byte Folded Reload ld.d $t1, $sp, 200 # 8-byte Folded Reload + ld.d $t3, $sp, 192 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_2: # %.preheader622 # Parent Loop BB0_1 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_3 Depth 3 - move $a4, $zero + move $a3, $zero .p2align 4, , 16 .LBB0_3: # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_2 Depth=2 # => This Inner Loop Header: Depth=3 - fldx.s $ft0, $a1, $a4 + fldx.s $ft0, $a1, $a3 fadd.s $fa7, $fa7, $ft0 fmul.s $ft0, $ft0, $ft0 - addi.d $a4, $a4, 4 + addi.d $a3, $a3, 4 fadd.s $fa6, $fa6, $ft0 - bne $a4, $t3, .LBB0_3 + bne $a3, $a2, .LBB0_3 # %bb.4: # in Loop: Header=BB0_2 Depth=2 addi.d $a0, $a0, 1 add.d $a1, $a1, $t0 - bne $a0, $t7, .LBB0_2 + bne $a0, $t5, .LBB0_2 # %bb.5: # in Loop: Header=BB0_1 Depth=1 fld.s $ft0, $a5, 0 fsub.s $ft1, $ft0, $ft0 - fst.s $ft1, $s7, 0 + fst.s $ft1, $s8, 0 fldx.s $ft1, $a5, $t0 pcalau12i $a0, %pc_hi20(.LCPI0_0) fld.s $ft2, $a0, %pc_lo12(.LCPI0_0) @@ -271,17 +275,17 @@ srad_kernel: # @srad_kernel fld.s $ft1, $a5, 0 fmul.s $fa7, $fa7, $ft2 fsub.s $ft1, $ft1, $ft0 - fst.s $ft1, $a2, 0 + fst.s $ft1, $ra, 0 fld.s $ft1, $a5, 4 fmul.s $fa6, $fa6, $ft2 fmul.s $fa7, $fa7, $fa7 fsub.s $fa6, $fa6, $fa7 fsub.s $ft1, $ft1, $ft0 - fst.s $ft1, $a3, 0 - fld.s $ft2, $s7, 0 + fst.s $ft1, $s4, 0 + fld.s $ft2, $s8, 0 fld.s $ft3, $s6, 0 fdiv.s $fa6, $fa6, $fa7 - fld.s $fa7, $a2, 0 + fld.s $fa7, $ra, 0 fmul.s $ft4, $ft2, $ft2 fmul.s $ft5, $ft3, $ft3 fadd.s $ft4, $ft4, $ft5 @@ -320,39 +324,40 @@ srad_kernel: # @srad_kernel fld.d $ft1, $a0, %pc_lo12(.LCPI0_2) frecip.d $ft2, $ft2 fcvt.s.d $ft3, $ft2 - fst.s $ft3, $ra, 0 + fst.s $ft3, $a6, 0 fcmp.clt.d $fcc0, $ft2, $ft1 - pcalau12i $s5, %pc_hi20(.LCPI0_3) + pcalau12i $s7, %pc_hi20(.LCPI0_3) fmov.s $ft3, $fa0 + ld.d $a0, $sp, 168 # 8-byte Folded Reload bcnez $fcc0, .LBB0_8 # %bb.6: # in Loop: Header=BB0_1 Depth=1 - fld.d $ft3, $s5, %pc_lo12(.LCPI0_3) + fld.d $ft3, $s7, %pc_lo12(.LCPI0_3) fcmp.cule.d $fcc0, $ft2, $ft3 bcnez $fcc0, .LBB0_9 # %bb.7: # in Loop: Header=BB0_1 Depth=1 vldi $vr11, -1168 .LBB0_8: # %.sink.split # in Loop: Header=BB0_1 Depth=1 - fst.s $ft3, $ra, 0 + fst.s $ft3, $a6, 0 .LBB0_9: # in Loop: Header=BB0_1 Depth=1 - fldx.s $ft2, $a5, $t8 + fldx.s $ft2, $a5, $t6 fsub.s $ft3, $ft2, $ft2 - fstx.s $ft3, $s7, $t8 - ori $a0, $t0, 4092 - move $a6, $a0 - fldx.s $ft3, $a5, $a0 + fstx.s $ft3, $s8, $t6 + ori $a1, $t0, 4092 + st.d $a1, $sp, 256 # 8-byte Folded Spill + fldx.s $ft3, $a5, $a1 fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $s6, $t8 - ori $a0, $zero, 4088 - fldx.s $ft3, $a5, $a0 + fstx.s $ft3, $s6, $t6 + ori $a1, $zero, 4088 + fldx.s $ft3, $a5, $a1 fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $a2, $t8 - fldx.s $ft3, $a5, $t8 + fstx.s $ft3, $ra, $t6 + fldx.s $ft3, $a5, $t6 fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $a3, $t8 - fldx.s $ft4, $s7, $t8 - fldx.s $ft5, $s6, $t8 - fldx.s $ft6, $a2, $t8 + fstx.s $ft3, $s4, $t6 + fldx.s $ft4, $s8, $t6 + fldx.s $ft5, $s6, $t6 + fldx.s $ft6, $ra, $t6 fmul.s $ft7, $ft4, $ft4 fmul.s $ft8, $ft5, $ft5 fadd.s $ft7, $ft7, $ft8 @@ -386,35 +391,35 @@ srad_kernel: # @srad_kernel frecip.d $ft2, $ft2 fcvt.s.d $ft3, $ft2 fcmp.clt.d $fcc0, $ft2, $ft1 - fstx.s $ft3, $ra, $t8 + fstx.s $ft3, $a6, $t6 fmov.s $ft3, $fa0 bcnez $fcc0, .LBB0_12 # %bb.10: # in Loop: Header=BB0_1 Depth=1 - fld.d $ft3, $s5, %pc_lo12(.LCPI0_3) + fld.d $ft3, $s7, %pc_lo12(.LCPI0_3) fcmp.cule.d $fcc0, $ft2, $ft3 bcnez $fcc0, .LBB0_13 # %bb.11: # in Loop: Header=BB0_1 Depth=1 vldi $vr11, -1168 .LBB0_12: # %.sink.split663 # in Loop: Header=BB0_1 Depth=1 - fstx.s $ft3, $ra, $t8 + fstx.s $ft3, $a6, $t6 .LBB0_13: # in Loop: Header=BB0_1 Depth=1 - ld.d $a1, $sp, 232 # 8-byte Folded Reload - fldx.s $ft2, $a1, $t0 - fld.s $ft3, $a1, 0 + ld.d $a3, $sp, 232 # 8-byte Folded Reload + fldx.s $ft2, $a3, $t0 + fld.s $ft3, $a3, 0 fsub.s $ft3, $ft3, $ft2 - fst.s $ft3, $t6, 0 - fldx.s $ft3, $a1, $t0 + fst.s $ft3, $a4, 0 + fldx.s $ft3, $a3, $t0 fsub.s $ft3, $ft3, $ft2 fst.s $ft3, $a7, 0 - fldx.s $ft3, $a1, $t0 + fldx.s $ft3, $a3, $t0 fsub.s $ft3, $ft3, $ft2 fst.s $ft3, $t1, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload - fldx.s $ft3, $a1, $a0 + ld.d $a1, $sp, 120 # 8-byte Folded Reload + fldx.s $ft3, $a3, $a1 fsub.s $ft3, $ft3, $ft2 - fst.s $ft3, $t5, 0 - fld.s $ft4, $t6, 0 + fst.s $ft3, $t3, 0 + fld.s $ft4, $a4, 0 fld.s $ft5, $a7, 0 fld.s $ft6, $t1, 0 fmul.s $ft7, $ft4, $ft4 @@ -450,37 +455,38 @@ srad_kernel: # @srad_kernel frecip.d $ft2, $ft2 fcvt.s.d $ft3, $ft2 fcmp.clt.d $fcc0, $ft2, $ft1 - fst.s $ft3, $s8, 0 + fst.s $ft3, $a0, 0 fmov.s $ft3, $fa0 bcnez $fcc0, .LBB0_16 # %bb.14: # in Loop: Header=BB0_1 Depth=1 - fld.d $ft3, $s5, %pc_lo12(.LCPI0_3) + fld.d $ft3, $s7, %pc_lo12(.LCPI0_3) fcmp.cule.d $fcc0, $ft2, $ft3 bcnez $fcc0, .LBB0_17 # %bb.15: # in Loop: Header=BB0_1 Depth=1 vldi $vr11, -1168 .LBB0_16: # %.sink.split665 # in Loop: Header=BB0_1 Depth=1 - fst.s $ft3, $s8, 0 + fst.s $ft3, $a0, 0 .LBB0_17: # in Loop: Header=BB0_1 Depth=1 ld.d $a0, $sp, 232 # 8-byte Folded Reload - fldx.s $ft2, $a0, $a6 - fldx.s $ft3, $a0, $t8 + ld.d $a2, $sp, 256 # 8-byte Folded Reload + fldx.s $ft2, $a0, $a2 + fldx.s $ft3, $a0, $t6 fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $t6, $t8 - fldx.s $ft3, $a0, $a6 + fstx.s $ft3, $a4, $t6 + fldx.s $ft3, $a0, $a2 fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $a7, $t8 - ori $a5, $t0, 4088 - fldx.s $ft3, $a0, $a5 + fstx.s $ft3, $a7, $t6 + ori $a1, $t0, 4088 + fldx.s $ft3, $a0, $a1 fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $t1, $t8 - fldx.s $ft3, $a0, $a6 + fstx.s $ft3, $t1, $t6 + fldx.s $ft3, $a0, $a2 fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $t5, $t8 - fldx.s $ft4, $t6, $t8 - fldx.s $ft5, $a7, $t8 - fldx.s $ft6, $t1, $t8 + fstx.s $ft3, $t3, $t6 + fldx.s $ft4, $a4, $t6 + fldx.s $ft5, $a7, $t6 + fldx.s $ft6, $t1, $t6 fmul.s $ft7, $ft4, $ft4 fmul.s $ft8, $ft5, $ft5 fadd.s $ft7, $ft7, $ft8 @@ -522,7 +528,7 @@ srad_kernel: # @srad_kernel b .LBB0_21 .p2align 4, , 16 .LBB0_19: # in Loop: Header=BB0_1 Depth=1 - fld.d $ft3, $s5, %pc_lo12(.LCPI0_3) + fld.d $ft3, $s7, %pc_lo12(.LCPI0_3) fcmp.cule.d $fcc0, $ft2, $ft3 bcnez $fcc0, .LBB0_22 # %bb.20: # in Loop: Header=BB0_1 Depth=1 @@ -533,83 +539,84 @@ srad_kernel: # @srad_kernel fst.s $ft2, $a0, 0 .LBB0_22: # %.preheader621.preheader # in Loop: Header=BB0_1 Depth=1 - st.d $s1, $sp, 176 # 8-byte Folded Spill + st.d $s0, $sp, 176 # 8-byte Folded Spill xvreplve0.w $xr10, $xr6 xvreplve0.w $xr11, $xr8 - ori $s3, $zero, 1 + ori $a0, $zero, 1 ld.d $a3, $sp, 8 # 8-byte Folded Reload - ld.d $a4, $sp, 16 # 8-byte Folded Reload - ld.d $a2, $sp, 24 # 8-byte Folded Reload - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $t1, $sp, 16 # 8-byte Folded Reload + ld.d $s6, $sp, 24 # 8-byte Folded Reload + ld.d $s2, $sp, 32 # 8-byte Folded Reload ld.d $t5, $sp, 40 # 8-byte Folded Reload - ld.d $s7, $sp, 184 # 8-byte Folded Reload - ld.d $fp, $sp, 56 # 8-byte Folded Reload - ld.d $s4, $sp, 104 # 8-byte Folded Reload - ld.d $s8, $sp, 96 # 8-byte Folded Reload - ld.d $t3, $sp, 88 # 8-byte Folded Reload - ld.d $a0, $sp, 80 # 8-byte Folded Reload - move $t1, $ra + ld.d $a4, $sp, 184 # 8-byte Folded Reload + ld.d $t7, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 112 # 8-byte Folded Reload + ld.d $t6, $sp, 104 # 8-byte Folded Reload + ld.d $t8, $sp, 96 # 8-byte Folded Reload + ld.d $fp, $sp, 88 # 8-byte Folded Reload + move $s4, $a6 b .LBB0_24 .p2align 4, , 16 .LBB0_23: # in Loop: Header=BB0_24 Depth=2 - addi.d $s3, $s3, 1 - add.d $t1, $t1, $t0 - add.d $a0, $a0, $t0 - add.d $t3, $t3, $t0 - add.d $s8, $s8, $t0 + addi.d $a0, $a0, 1 add.d $s4, $s4, $t0 add.d $fp, $fp, $t0 - add.d $s7, $s7, $t0 - add.d $t5, $t5, $t0 - add.d $a1, $a1, $t0 - add.d $a2, $a2, $t0 + add.d $t8, $t8, $t0 + add.d $t6, $t6, $t0 + add.d $s0, $s0, $t0 + add.d $t7, $t7, $t0 add.d $a4, $a4, $t0 + add.d $t5, $t5, $t0 + add.d $s2, $s2, $t0 + add.d $s6, $s6, $t0 + add.d $t1, $t1, $t0 add.d $a3, $a3, $t0 - ori $a7, $zero, 2047 - beq $s3, $a7, .LBB0_51 + move $s1, $a2 + ori $a5, $zero, 2047 + ori $s3, $zero, 4060 + lu52i.d $s5, $zero, -1029 + beq $a0, $a5, .LBB0_51 .LBB0_24: # %.preheader621 # Parent Loop BB0_1 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_27 Depth 3 # Child Loop BB0_48 Depth 3 - ori $a7, $zero, 1 - ld.d $t6, $sp, 248 # 8-byte Folded Reload - bnez $t6, .LBB0_44 + ori $a5, $zero, 1 + ld.d $a6, $sp, 240 # 8-byte Folded Reload + bnez $a6, .LBB0_44 # %bb.25: # %vector.body814.preheader # in Loop: Header=BB0_24 Depth=2 - ori $s6, $t2, 32 - ld.d $t8, $sp, 240 # 8-byte Folded Reload - ori $s0, $zero, 4068 - ori $s1, $zero, 4060 + ori $s8, $t2, 32 + ori $a2, $zero, 4068 b .LBB0_27 .p2align 4, , 16 .LBB0_26: # %pred.store.continue837 # in Loop: Header=BB0_27 Depth=3 - addi.d $s6, $s6, 32 - beqz $s6, .LBB0_43 + addi.d $s8, $s8, 32 + beqz $s8, .LBB0_43 .LBB0_27: # %vector.body814 # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_24 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a7, $fp, $s6 - xvldx $xr12, $a7, $t4 - xvld $xr13, $a7, -32 - ori $t6, $t0, 4064 - xvldx $xr14, $a7, $t6 + add.d $a5, $t7, $s8 + xvldx $xr12, $a5, $t4 + xvld $xr13, $a5, -32 + ori $a6, $t0, 4064 + xvldx $xr14, $a5, $a6 xvfsub.s $xr13, $xr13, $xr12 - add.d $t6, $s4, $s6 - xvstx $xr13, $t6, $t4 + add.d $a6, $s0, $s8 + xvstx $xr13, $a6, $t4 xvfsub.s $xr14, $xr14, $xr12 - xvldx $xr15, $a7, $s1 - add.d $t6, $s8, $s6 - xvstx $xr14, $t6, $t4 - xvldx $xr16, $a7, $s0 + xvldx $xr15, $a5, $s3 + add.d $a6, $t6, $s8 + xvstx $xr14, $a6, $t4 + xvldx $xr16, $a5, $a2 xvfsub.s $xr15, $xr15, $xr12 - add.d $a7, $t3, $s6 - xvstx $xr15, $a7, $t4 + add.d $a5, $t8, $s8 + xvstx $xr15, $a5, $t4 xvfsub.s $xr16, $xr16, $xr12 - add.d $a7, $a0, $s6 - xvstx $xr16, $a7, $t4 + add.d $a5, $fp, $s8 + xvstx $xr16, $a5, $t4 xvfmul.s $xr17, $xr13, $xr13 xvfmul.s $xr18, $xr14, $xr14 xvfadd.s $xr17, $xr17, $xr18 @@ -646,8 +653,7 @@ srad_kernel: # @srad_kernel vreplvei.w $vr13, $vr17, 3 fcvt.d.s $ft5, $ft5 xvinsve0.d $xr14, $xr13, 3 - lu52i.d $a7, $zero, 1022 - xvreplgr2vr.d $xr13, $a7 + xvldi $xr13, -928 xvfmul.d $xr14, $xr14, $xr13 xvfmul.d $xr13, $xr15, $xr13 xvfmul.s $xr15, $xr12, $xr12 @@ -674,8 +680,7 @@ srad_kernel: # @srad_kernel vreplvei.w $vr15, $vr15, 3 fcvt.d.s $ft7, $ft7 xvinsve0.d $xr17, $xr15, 3 - lu52i.d $a7, $zero, -1029 - xvreplgr2vr.d $xr15, $a7 + xvreplgr2vr.d $xr15, $s5 xvfmul.d $xr16, $xr17, $xr15 xvfmul.d $xr15, $xr18, $xr15 xvfadd.d $xr15, $xr13, $xr15 @@ -726,12 +731,10 @@ srad_kernel: # @srad_kernel vreplvei.w $vr12, $vr12, 3 fcvt.d.s $ft4, $ft4 xvinsve0.d $xr15, $xr12, 3 - lu52i.d $a7, $zero, 1021 - xvreplgr2vr.d $xr12, $a7 + xvldi $xr12, -944 xvfmul.d $xr14, $xr15, $xr12 xvfmul.d $xr15, $xr16, $xr12 - lu52i.d $a7, $zero, 1023 - xvreplgr2vr.d $xr12, $a7 + xvldi $xr12, -912 xvfadd.d $xr15, $xr15, $xr12 xvfadd.d $xr14, $xr14, $xr12 xvpickve.d $xr16, $xr14, 1 @@ -811,214 +814,215 @@ srad_kernel: # @srad_kernel xvpickve.d $xr14, $xr12, 3 fcvt.s.d $ft6, $ft6 xvinsve0.w $xr15, $xr14, 7 - add.d $s2, $t1, $s6 - ori $a7, $t0, 4068 - xvstx $xr15, $s2, $a7 - lu52i.d $t6, $zero, -1175 - xvreplgr2vr.d $xr14, $t6 + add.d $t3, $s4, $s8 + ori $a5, $t0, 4068 + xvstx $xr15, $t3, $a5 + lu52i.d $a6, $zero, -1175 + xvreplgr2vr.d $xr14, $a6 xvfcmp.clt.d $xr15, $xr13, $xr14 - xvpickve2gr.d $t6, $xr15, 0 - xvinsgr2vr.w $xr16, $t6, 0 - xvpickve2gr.d $t6, $xr15, 1 - xvinsgr2vr.w $xr16, $t6, 1 - xvpickve2gr.d $t6, $xr15, 2 - xvinsgr2vr.w $xr16, $t6, 2 - xvpickve2gr.d $t6, $xr15, 3 - xvinsgr2vr.w $xr16, $t6, 3 + xvpickve2gr.d $a6, $xr15, 0 + xvinsgr2vr.w $xr16, $a6, 0 + xvpickve2gr.d $a6, $xr15, 1 + xvinsgr2vr.w $xr16, $a6, 1 + xvpickve2gr.d $a6, $xr15, 2 + xvinsgr2vr.w $xr16, $a6, 2 + xvpickve2gr.d $a6, $xr15, 3 + xvinsgr2vr.w $xr16, $a6, 3 xvfcmp.clt.d $xr14, $xr12, $xr14 - xvpickve2gr.d $t6, $xr14, 0 - xvinsgr2vr.w $xr16, $t6, 4 - xvpickve2gr.d $t6, $xr14, 1 - xvinsgr2vr.w $xr16, $t6, 5 - xvpickve2gr.d $t6, $xr14, 2 - xvinsgr2vr.w $xr16, $t6, 6 - xvpickve2gr.d $t6, $xr14, 3 - xvinsgr2vr.w $xr16, $t6, 7 - xvreplgr2vr.d $xr14, $t8 + xvpickve2gr.d $a6, $xr14, 0 + xvinsgr2vr.w $xr16, $a6, 4 + xvpickve2gr.d $a6, $xr14, 1 + xvinsgr2vr.w $xr16, $a6, 5 + xvpickve2gr.d $a6, $xr14, 2 + xvinsgr2vr.w $xr16, $a6, 6 + xvpickve2gr.d $a6, $xr14, 3 + xvinsgr2vr.w $xr16, $a6, 7 + xvreplgr2vr.d $xr14, $s1 xvfcmp.clt.d $xr13, $xr14, $xr13 - xvpickve2gr.d $t6, $xr13, 0 - xvinsgr2vr.w $xr15, $t6, 0 - xvpickve2gr.d $t6, $xr13, 1 - xvinsgr2vr.w $xr15, $t6, 1 - xvpickve2gr.d $t6, $xr13, 2 - xvinsgr2vr.w $xr15, $t6, 2 - xvpickve2gr.d $t6, $xr13, 3 - xvinsgr2vr.w $xr15, $t6, 3 + xvpickve2gr.d $a6, $xr13, 0 + xvinsgr2vr.w $xr15, $a6, 0 + xvpickve2gr.d $a6, $xr13, 1 + xvinsgr2vr.w $xr15, $a6, 1 + xvpickve2gr.d $a6, $xr13, 2 + xvinsgr2vr.w $xr15, $a6, 2 + xvpickve2gr.d $a6, $xr13, 3 + xvinsgr2vr.w $xr15, $a6, 3 xvfcmp.clt.d $xr12, $xr14, $xr12 - xvpickve2gr.d $t6, $xr12, 0 - xvinsgr2vr.w $xr15, $t6, 4 - xvpickve2gr.d $t6, $xr12, 1 - xvinsgr2vr.w $xr15, $t6, 5 - xvpickve2gr.d $t6, $xr12, 2 - xvinsgr2vr.w $xr15, $t6, 6 - xvpickve2gr.d $t6, $xr12, 3 - xvinsgr2vr.w $xr15, $t6, 7 + xvpickve2gr.d $a6, $xr12, 0 + xvinsgr2vr.w $xr15, $a6, 4 + xvpickve2gr.d $a6, $xr12, 1 + xvinsgr2vr.w $xr15, $a6, 5 + xvpickve2gr.d $a6, $xr12, 2 + xvinsgr2vr.w $xr15, $a6, 6 + xvpickve2gr.d $a6, $xr12, 3 + xvinsgr2vr.w $xr15, $a6, 7 xvor.v $xr14, $xr15, $xr16 - xvpickve2gr.w $t6, $xr14, 0 - lu12i.w $t7, 260096 - xvreplgr2vr.w $xr12, $t7 - xvrepli.b $xr13, 0 - andi $t7, $t6, 1 - xvbitsel.v $xr12, $xr12, $xr13, $xr16 - beqz $t7, .LBB0_29 + xvpickve2gr.w $a6, $xr14, 0 + xvrepli.b $xr12, 0 + xvldi $xr13, -1424 + andi $a7, $a6, 1 + xvbitsel.v $xr12, $xr13, $xr12, $xr16 + beqz $a7, .LBB0_29 # %bb.28: # %pred.store.if # in Loop: Header=BB0_27 Depth=3 - add.d $a7, $s2, $a7 - xvstelm.w $xr12, $a7, 0, 0 + add.d $a5, $t3, $a5 + xvstelm.w $xr12, $a5, 0, 0 .LBB0_29: # %pred.store.continue # in Loop: Header=BB0_27 Depth=3 - vinsgr2vr.h $vr13, $t6, 0 - xvpickve2gr.w $a7, $xr14, 1 - vinsgr2vr.h $vr13, $a7, 1 - xvpickve2gr.w $a7, $xr14, 2 - vinsgr2vr.h $vr13, $a7, 2 - xvpickve2gr.w $a7, $xr14, 3 - vinsgr2vr.h $vr13, $a7, 3 - xvpickve2gr.w $a7, $xr14, 4 - vinsgr2vr.h $vr13, $a7, 4 - xvpickve2gr.w $a7, $xr14, 5 - vinsgr2vr.h $vr13, $a7, 5 - xvpickve2gr.w $a7, $xr14, 6 - vinsgr2vr.h $vr13, $a7, 6 - xvpickve2gr.w $a7, $xr14, 7 - vinsgr2vr.h $vr13, $a7, 7 - vpickve2gr.h $a7, $vr13, 1 - andi $a7, $a7, 1 - bnez $a7, .LBB0_36 + vinsgr2vr.h $vr13, $a6, 0 + xvpickve2gr.w $a5, $xr14, 1 + vinsgr2vr.h $vr13, $a5, 1 + xvpickve2gr.w $a5, $xr14, 2 + vinsgr2vr.h $vr13, $a5, 2 + xvpickve2gr.w $a5, $xr14, 3 + vinsgr2vr.h $vr13, $a5, 3 + xvpickve2gr.w $a5, $xr14, 4 + vinsgr2vr.h $vr13, $a5, 4 + xvpickve2gr.w $a5, $xr14, 5 + vinsgr2vr.h $vr13, $a5, 5 + xvpickve2gr.w $a5, $xr14, 6 + vinsgr2vr.h $vr13, $a5, 6 + xvpickve2gr.w $a5, $xr14, 7 + vinsgr2vr.h $vr13, $a5, 7 + vpickve2gr.h $a5, $vr13, 1 + andi $a5, $a5, 1 + bnez $a5, .LBB0_36 # %bb.30: # %pred.store.continue825 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.h $a7, $vr13, 2 - andi $a7, $a7, 1 - bnez $a7, .LBB0_37 + vpickve2gr.h $a5, $vr13, 2 + andi $a5, $a5, 1 + bnez $a5, .LBB0_37 .LBB0_31: # %pred.store.continue827 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.h $a7, $vr13, 3 - andi $a7, $a7, 1 - bnez $a7, .LBB0_38 + vpickve2gr.h $a5, $vr13, 3 + andi $a5, $a5, 1 + bnez $a5, .LBB0_38 .LBB0_32: # %pred.store.continue829 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.h $a7, $vr13, 4 - andi $a7, $a7, 1 - bnez $a7, .LBB0_39 + vpickve2gr.h $a5, $vr13, 4 + andi $a5, $a5, 1 + bnez $a5, .LBB0_39 .LBB0_33: # %pred.store.continue831 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.h $a7, $vr13, 5 - andi $a7, $a7, 1 - bnez $a7, .LBB0_40 + vpickve2gr.h $a5, $vr13, 5 + andi $a5, $a5, 1 + bnez $a5, .LBB0_40 .LBB0_34: # %pred.store.continue833 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.h $a7, $vr13, 6 - andi $a7, $a7, 1 - bnez $a7, .LBB0_41 + vpickve2gr.h $a5, $vr13, 6 + andi $a5, $a5, 1 + bnez $a5, .LBB0_41 .LBB0_35: # %pred.store.continue835 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.h $a7, $vr13, 7 - andi $a7, $a7, 1 - beqz $a7, .LBB0_26 + vpickve2gr.h $a5, $vr13, 7 + andi $a5, $a5, 1 + beqz $a5, .LBB0_26 b .LBB0_42 .p2align 4, , 16 .LBB0_36: # %pred.store.if824 # in Loop: Header=BB0_27 Depth=3 - ori $a7, $t0, 4072 - add.d $a7, $s2, $a7 - xvstelm.w $xr12, $a7, 0, 1 - vpickve2gr.h $a7, $vr13, 2 - andi $a7, $a7, 1 - beqz $a7, .LBB0_31 + ori $a5, $t0, 4072 + add.d $a5, $t3, $a5 + xvstelm.w $xr12, $a5, 0, 1 + vpickve2gr.h $a5, $vr13, 2 + andi $a5, $a5, 1 + beqz $a5, .LBB0_31 .LBB0_37: # %pred.store.if826 # in Loop: Header=BB0_27 Depth=3 - ori $a7, $t0, 4076 - add.d $a7, $s2, $a7 - xvstelm.w $xr12, $a7, 0, 2 - vpickve2gr.h $a7, $vr13, 3 - andi $a7, $a7, 1 - beqz $a7, .LBB0_32 + ori $a5, $t0, 4076 + add.d $a5, $t3, $a5 + xvstelm.w $xr12, $a5, 0, 2 + vpickve2gr.h $a5, $vr13, 3 + andi $a5, $a5, 1 + beqz $a5, .LBB0_32 .LBB0_38: # %pred.store.if828 # in Loop: Header=BB0_27 Depth=3 - ori $a7, $t0, 4080 - add.d $a7, $s2, $a7 - xvstelm.w $xr12, $a7, 0, 3 - vpickve2gr.h $a7, $vr13, 4 - andi $a7, $a7, 1 - beqz $a7, .LBB0_33 + ori $a5, $t0, 4080 + add.d $a5, $t3, $a5 + xvstelm.w $xr12, $a5, 0, 3 + vpickve2gr.h $a5, $vr13, 4 + andi $a5, $a5, 1 + beqz $a5, .LBB0_33 .LBB0_39: # %pred.store.if830 # in Loop: Header=BB0_27 Depth=3 - ori $a7, $t0, 4084 - add.d $a7, $s2, $a7 - xvstelm.w $xr12, $a7, 0, 4 - vpickve2gr.h $a7, $vr13, 5 - andi $a7, $a7, 1 - beqz $a7, .LBB0_34 + ori $a5, $t0, 4084 + add.d $a5, $t3, $a5 + xvstelm.w $xr12, $a5, 0, 4 + vpickve2gr.h $a5, $vr13, 5 + andi $a5, $a5, 1 + beqz $a5, .LBB0_34 .LBB0_40: # %pred.store.if832 # in Loop: Header=BB0_27 Depth=3 - add.d $a7, $s2, $a5 - xvstelm.w $xr12, $a7, 0, 5 - vpickve2gr.h $a7, $vr13, 6 - andi $a7, $a7, 1 - beqz $a7, .LBB0_35 + add.d $a5, $t3, $a1 + xvstelm.w $xr12, $a5, 0, 5 + vpickve2gr.h $a5, $vr13, 6 + andi $a5, $a5, 1 + beqz $a5, .LBB0_35 .LBB0_41: # %pred.store.if834 # in Loop: Header=BB0_27 Depth=3 - add.d $a7, $s2, $a6 - xvstelm.w $xr12, $a7, 0, 6 - vpickve2gr.h $a7, $vr13, 7 - andi $a7, $a7, 1 - beqz $a7, .LBB0_26 + ld.d $a5, $sp, 256 # 8-byte Folded Reload + add.d $a5, $t3, $a5 + xvstelm.w $xr12, $a5, 0, 6 + vpickve2gr.h $a5, $vr13, 7 + andi $a5, $a5, 1 + beqz $a5, .LBB0_26 .LBB0_42: # %pred.store.if836 # in Loop: Header=BB0_27 Depth=3 - lu12i.w $a7, 2 - add.d $a7, $s2, $a7 - xvstelm.w $xr12, $a7, 0, 7 + lu12i.w $a5, 2 + add.d $a5, $t3, $a5 + xvstelm.w $xr12, $a5, 0, 7 b .LBB0_26 .p2align 4, , 16 .LBB0_43: # in Loop: Header=BB0_24 Depth=2 - ori $a7, $zero, 1017 + ori $a5, $zero, 1017 .LBB0_44: # %scalar.ph810.preheader # in Loop: Header=BB0_24 Depth=2 - slli.d $s2, $a7, 2 - ori $s6, $zero, 4092 - move $s1, $a3 + move $a2, $s1 + slli.d $t3, $a5, 2 + ori $s8, $zero, 4092 + move $a6, $a3 + move $s5, $t1 + move $a5, $s6 + move $s3, $s2 + move $s1, $t5 move $a7, $a4 - move $t7, $a2 - move $s0, $a1 - move $t6, $t5 - move $t8, $s7 b .LBB0_48 .p2align 4, , 16 .LBB0_45: # in Loop: Header=BB0_48 Depth=3 movgr2fr.w $ft4, $zero .LBB0_46: # %.sink.split669 # in Loop: Header=BB0_48 Depth=3 - fstx.s $ft4, $t8, $s2 + fstx.s $ft4, $a7, $t3 .LBB0_47: # in Loop: Header=BB0_48 Depth=3 - addi.d $s6, $s6, -4 - addi.d $t8, $t8, 4 - addi.d $t6, $t6, 4 - addi.d $s0, $s0, 4 - addi.d $t7, $t7, 4 + addi.d $s8, $s8, -4 addi.d $a7, $a7, 4 addi.d $s1, $s1, 4 - beq $s2, $s6, .LBB0_23 + addi.d $s3, $s3, 4 + addi.d $a5, $a5, 4 + addi.d $s5, $s5, 4 + addi.d $a6, $a6, 4 + beq $t3, $s8, .LBB0_23 .LBB0_48: # %scalar.ph810 # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_24 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $ra, $t7, $s2 - fldx.s $ft4, $t7, $s2 + add.d $ra, $a5, $t3 + fldx.s $ft4, $a5, $t3 fldx.s $ft5, $ra, $t2 fsub.s $ft5, $ft5, $ft4 - fstx.s $ft5, $s0, $s2 + fstx.s $ft5, $s3, $t3 fldx.s $ft5, $ra, $t0 fsub.s $ft5, $ft5, $ft4 - fstx.s $ft5, $a7, $s2 + fstx.s $ft5, $s5, $t3 fld.s $ft5, $ra, -4 fsub.s $ft5, $ft5, $ft4 - fstx.s $ft5, $s1, $s2 + fstx.s $ft5, $a6, $t3 fld.s $ft5, $ra, 4 fsub.s $ft5, $ft5, $ft4 - fstx.s $ft5, $t6, $s2 - fldx.s $ft6, $s0, $s2 - fldx.s $ft7, $a7, $s2 - fldx.s $ft8, $s1, $s2 + fstx.s $ft5, $s1, $t3 + fldx.s $ft6, $s3, $t3 + fldx.s $ft7, $s5, $t3 + fldx.s $ft8, $a6, $t3 fmul.s $ft9, $ft6, $ft6 fmul.s $ft10, $ft7, $ft7 fadd.s $ft9, $ft9, $ft10 @@ -1052,10 +1056,10 @@ srad_kernel: # @srad_kernel frecip.d $ft4, $ft4 fcvt.s.d $ft5, $ft4 fcmp.clt.d $fcc0, $ft4, $ft1 - fstx.s $ft5, $t8, $s2 + fstx.s $ft5, $a7, $t3 bcnez $fcc0, .LBB0_45 # %bb.49: # in Loop: Header=BB0_48 Depth=3 - fld.d $ft5, $s5, %pc_lo12(.LCPI0_3) + fld.d $ft5, $s7, %pc_lo12(.LCPI0_3) fcmp.cule.d $fcc0, $ft4, $ft5 bcnez $fcc0, .LBB0_47 # %bb.50: # in Loop: Header=BB0_48 Depth=3 @@ -1064,61 +1068,56 @@ srad_kernel: # @srad_kernel .p2align 4, , 16 .LBB0_51: # %vector.memcheck698 # in Loop: Header=BB0_1 Depth=1 - lu52i.d $a0, $zero, 1020 - xvreplgr2vr.d $xr6, $a0 - ld.d $t8, $sp, 216 # 8-byte Folded Reload - ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $s1, $sp, 192 # 8-byte Folded Reload - ld.d $a0, $sp, 64 # 8-byte Folded Reload + xvldi $xr6, -960 + ld.d $t7, $sp, 216 # 8-byte Folded Reload + ld.d $t8, $sp, 208 # 8-byte Folded Reload + ld.d $fp, $sp, 200 # 8-byte Folded Reload + ld.d $s0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload beqz $a0, .LBB0_53 # %bb.52: # in Loop: Header=BB0_1 Depth=1 move $a1, $zero - ld.d $ra, $sp, 168 # 8-byte Folded Reload - ld.d $a5, $sp, 160 # 8-byte Folded Reload - ld.d $a3, $sp, 152 # 8-byte Folded Reload - ld.d $a2, $sp, 144 # 8-byte Folded Reload + ld.d $a6, $sp, 160 # 8-byte Folded Reload + ld.d $a5, $sp, 152 # 8-byte Folded Reload + ld.d $s4, $sp, 144 # 8-byte Folded Reload + ld.d $ra, $sp, 48 # 8-byte Folded Reload ld.d $s6, $sp, 136 # 8-byte Folded Reload - ld.d $s7, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 120 # 8-byte Folded Reload - ld.d $t7, $sp, 72 # 8-byte Folded Reload - ld.d $s0, $sp, 200 # 8-byte Folded Reload - ori $s2, $zero, 4092 + ld.d $s8, $sp, 128 # 8-byte Folded Reload + ld.d $s7, $sp, 80 # 8-byte Folded Reload b .LBB0_56 .p2align 4, , 16 .LBB0_53: # %vector.body726.preheader # in Loop: Header=BB0_1 Depth=1 ori $a0, $t2, 32 - ld.d $ra, $sp, 168 # 8-byte Folded Reload - ld.d $a5, $sp, 160 # 8-byte Folded Reload - ld.d $a3, $sp, 152 # 8-byte Folded Reload - ld.d $a2, $sp, 144 # 8-byte Folded Reload + ld.d $a6, $sp, 160 # 8-byte Folded Reload + ld.d $a5, $sp, 152 # 8-byte Folded Reload + ld.d $s4, $sp, 144 # 8-byte Folded Reload + ld.d $ra, $sp, 48 # 8-byte Folded Reload ld.d $s6, $sp, 136 # 8-byte Folded Reload - ld.d $s7, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 120 # 8-byte Folded Reload - ld.d $t7, $sp, 72 # 8-byte Folded Reload - ld.d $s0, $sp, 200 # 8-byte Folded Reload - ori $s2, $zero, 4092 + ld.d $s8, $sp, 128 # 8-byte Folded Reload + ld.d $a3, $sp, 168 # 8-byte Folded Reload + ld.d $s7, $sp, 80 # 8-byte Folded Reload ori $a4, $zero, 4068 .p2align 4, , 16 .LBB0_54: # %vector.body726 # Parent Loop BB0_1 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a1, $s8, $a0 + add.d $a1, $a3, $a0 xvldx $xr7, $a1, $t4 xvldx $xr8, $a1, $a4 - add.d $a1, $t8, $a0 + add.d $a1, $t7, $a0 xvldx $xr9, $a1, $t4 - add.d $a1, $fp, $a0 + add.d $a1, $t8, $a0 xvldx $xr10, $a1, $t4 - add.d $a1, $s0, $a0 + add.d $a1, $fp, $a0 xvldx $xr11, $a1, $t4 xvfmul.s $xr9, $xr7, $xr9 xvfmul.s $xr10, $xr7, $xr10 xvfadd.s $xr9, $xr9, $xr10 xvfmul.s $xr7, $xr7, $xr11 - add.d $a1, $s1, $a0 + add.d $a1, $s0, $a0 xvldx $xr10, $a1, $t4 - add.d $a1, $t7, $a0 + add.d $a1, $s7, $a0 xvldx $xr11, $a1, $t4 xvfadd.s $xr7, $xr9, $xr7 xvfmul.s $xr8, $xr8, $xr10 @@ -1205,11 +1204,12 @@ srad_kernel: # @srad_kernel # in Loop: Header=BB0_1 Depth=1 slli.d $a0, $a1, 2 addi.d $a1, $a1, -1023 - move $t6, $s1 - move $a4, $s0 - move $t5, $fp - move $t3, $t8 - ld.d $a7, $sp, 48 # 8-byte Folded Reload + move $t6, $s7 + move $a3, $s0 + move $a4, $fp + move $t5, $t8 + move $t3, $t7 + ld.d $a7, $sp, 56 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_57: # %.preheader623 # Parent Loop BB0_1 Depth=1 @@ -1223,9 +1223,9 @@ srad_kernel: # @srad_kernel fldx.s $ft3, $a4, $a0 fmul.s $ft1, $fa7, $ft1 fadd.s $ft0, $ft0, $ft1 - fldx.s $ft1, $t6, $a0 + fldx.s $ft1, $a3, $a0 fmul.s $fa7, $fa7, $ft3 - fldx.s $ft3, $t7, $a0 + fldx.s $ft3, $t6, $a0 fadd.s $fa7, $ft0, $fa7 fmul.s $ft0, $ft2, $ft1 fadd.s $fa7, $fa7, $ft0 @@ -1234,29 +1234,31 @@ srad_kernel: # @srad_kernel fmul.d $fa7, $fa7, $fa5 fadd.d $fa7, $fa7, $ft0 fcvt.s.d $fa7, $fa7 - fstx.s $fa7, $t7, $a0 + fstx.s $fa7, $t6, $a0 addi.d $a7, $a7, 4 addi.d $t3, $t3, 4 addi.d $t5, $t5, 4 addi.d $a4, $a4, 4 - addi.d $t6, $t6, 4 + addi.d $a3, $a3, 4 addi.d $a1, $a1, 1 - addi.d $t7, $t7, 4 + addi.d $t6, $t6, 4 bnez $a1, .LBB0_57 # %bb.58: # in Loop: Header=BB0_1 Depth=1 ld.d $a0, $sp, 224 # 8-byte Folded Reload fld.s $fa7, $a0, 0 - fldx.s $ft0, $t8, $s2 - fldx.s $ft1, $fp, $s2 + ori $a1, $zero, 4092 + fldx.s $ft0, $t7, $a1 + fldx.s $ft1, $t8, $a1 move $a0, $zero fmul.s $ft0, $fa7, $ft0 - fldx.s $ft2, $s0, $s2 + fldx.s $ft2, $fp, $a1 fmul.s $ft1, $fa7, $ft1 fadd.s $ft0, $ft0, $ft1 - fldx.s $ft1, $s1, $s2 + fldx.s $ft1, $s0, $a1 fmul.s $ft2, $fa7, $ft2 ld.d $a1, $sp, 232 # 8-byte Folded Reload - fldx.s $ft3, $a1, $a6 + ld.d $a3, $sp, 256 # 8-byte Folded Reload + fldx.s $ft3, $a1, $a3 fadd.s $ft0, $ft0, $ft2 fmul.s $fa7, $fa7, $ft1 fadd.s $fa7, $ft0, $fa7 @@ -1265,52 +1267,52 @@ srad_kernel: # @srad_kernel fmul.d $fa7, $fa7, $fa5 fadd.d $fa7, $fa7, $ft0 fcvt.s.d $fa7, $fa7 - fstx.s $fa7, $a1, $a6 + fstx.s $fa7, $a1, $a3 + move $a1, $a6 + move $s5, $s8 + move $a4, $s6 move $s3, $ra - move $s2, $s7 - move $a7, $s6 - move $t1, $a2 - move $t3, $a3 - move $t6, $a5 - ld.d $t7, $sp, 184 # 8-byte Folded Reload + move $s2, $s4 + move $a7, $a5 + ld.d $t6, $sp, 184 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_59: # %.preheader # Parent Loop BB0_1 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_62 Depth 3 # Child Loop BB0_65 Depth 3 - ld.d $a1, $sp, 256 # 8-byte Folded Reload - beqz $a1, .LBB0_61 + ld.d $a3, $sp, 248 # 8-byte Folded Reload + beqz $a3, .LBB0_61 # %bb.60: # in Loop: Header=BB0_59 Depth=2 - move $a4, $zero + move $t1, $zero b .LBB0_64 .p2align 4, , 16 .LBB0_61: # %vector.body.preheader # in Loop: Header=BB0_59 Depth=2 - ori $t8, $t2, 32 + ori $t7, $t2, 32 .p2align 4, , 16 .LBB0_62: # %vector.body # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_59 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a1, $t7, $t8 - xvld $xr7, $a1, -32 - xvldx $xr8, $a1, $t4 - xvld $xr9, $a1, -28 - add.d $a1, $s2, $t8 - xvldx $xr10, $a1, $t4 - add.d $a1, $a7, $t8 - xvldx $xr11, $a1, $t4 - add.d $a1, $t1, $t8 - xvldx $xr12, $a1, $t4 + add.d $a3, $t6, $t7 + xvld $xr7, $a3, -32 + xvldx $xr8, $a3, $t4 + xvld $xr9, $a3, -28 + add.d $a3, $s5, $t7 + xvldx $xr10, $a3, $t4 + add.d $a3, $a4, $t7 + xvldx $xr11, $a3, $t4 + add.d $a3, $s3, $t7 + xvldx $xr12, $a3, $t4 xvfmul.s $xr10, $xr7, $xr10 xvfmul.s $xr8, $xr8, $xr11 xvfadd.s $xr8, $xr10, $xr8 xvfmul.s $xr7, $xr7, $xr12 - add.d $a1, $t3, $t8 - xvldx $xr10, $a1, $t4 - add.d $fp, $t6, $t8 - xvldx $xr11, $fp, $t4 + add.d $a3, $s2, $t7 + xvldx $xr10, $a3, $t4 + add.d $t8, $a7, $t7 + xvldx $xr11, $t8, $t4 xvfadd.s $xr7, $xr8, $xr7 xvfmul.s $xr8, $xr9, $xr10 xvfadd.s $xr8, $xr7, $xr8 @@ -1387,39 +1389,39 @@ srad_kernel: # @srad_kernel xvpickve.d $xr7, $xr9, 3 fcvt.s.d $fa7, $fa7 xvinsve0.w $xr10, $xr7, 7 - addi.d $t8, $t8, 32 - xvstx $xr10, $fp, $t4 - bnez $t8, .LBB0_62 + addi.d $t7, $t7, 32 + xvstx $xr10, $t8, $t4 + bnez $t7, .LBB0_62 # %bb.63: # in Loop: Header=BB0_59 Depth=2 - ori $a4, $zero, 1016 + ori $t1, $zero, 1016 .LBB0_64: # %scalar.ph.preheader # in Loop: Header=BB0_59 Depth=2 - slli.d $a1, $a4, 2 - addi.d $s5, $a4, -1023 - move $s4, $t6 - move $a4, $t3 - move $t5, $t1 - move $t8, $a7 - move $fp, $s2 - move $s0, $s3 + slli.d $a3, $t1, 2 + addi.d $t1, $t1, -1023 + move $t3, $a7 + move $t5, $s2 + move $t7, $s3 + move $t8, $a4 + move $fp, $s5 + move $s0, $a1 .p2align 4, , 16 .LBB0_65: # %scalar.ph # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_59 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $s1, $s0, $a1 + add.d $s1, $s0, $a3 fldx.s $fa7, $s1, $t0 - fldx.s $ft0, $t8, $a1 - fldx.s $ft1, $s0, $a1 - fldx.s $ft2, $fp, $a1 + fldx.s $ft0, $t8, $a3 + fldx.s $ft1, $s0, $a3 + fldx.s $ft2, $fp, $a3 fld.s $ft3, $s1, 4 fmul.s $fa7, $fa7, $ft0 - fldx.s $ft0, $t5, $a1 + fldx.s $ft0, $t7, $a3 fmul.s $ft2, $ft1, $ft2 fadd.s $fa7, $ft2, $fa7 - fldx.s $ft2, $a4, $a1 + fldx.s $ft2, $t5, $a3 fmul.s $ft0, $ft1, $ft0 - fldx.s $ft1, $s4, $a1 + fldx.s $ft1, $t3, $a3 fadd.s $fa7, $fa7, $ft0 fmul.s $ft0, $ft3, $ft2 fadd.s $fa7, $fa7, $ft0 @@ -1428,36 +1430,37 @@ srad_kernel: # @srad_kernel fmul.d $fa7, $fa7, $fa5 fadd.d $fa7, $fa7, $ft0 fcvt.s.d $fa7, $fa7 - fstx.s $fa7, $s4, $a1 + fstx.s $fa7, $t3, $a3 addi.d $s0, $s0, 4 addi.d $fp, $fp, 4 addi.d $t8, $t8, 4 + addi.d $t7, $t7, 4 addi.d $t5, $t5, 4 - addi.d $a4, $a4, 4 - addi.d $s5, $s5, 1 - addi.d $s4, $s4, 4 - bnez $s5, .LBB0_65 + addi.d $t1, $t1, 1 + addi.d $t3, $t3, 4 + bnez $t1, .LBB0_65 # %bb.66: # in Loop: Header=BB0_59 Depth=2 addi.d $a0, $a0, 1 - add.d $t7, $t7, $t0 add.d $t6, $t6, $t0 - add.d $t3, $t3, $t0 - add.d $t1, $t1, $t0 add.d $a7, $a7, $t0 add.d $s2, $s2, $t0 add.d $s3, $s3, $t0 - ori $a1, $zero, 2047 - bne $a0, $a1, .LBB0_59 + add.d $a4, $a4, $t0 + add.d $s5, $s5, $t0 + add.d $a1, $a1, $t0 + ori $a3, $zero, 2047 + bne $a0, $a3, .LBB0_59 # %bb.67: # in Loop: Header=BB0_1 Depth=1 - ld.d $s1, $sp, 176 # 8-byte Folded Reload - addi.w $s1, $s1, 1 - ld.d $t6, $sp, 216 # 8-byte Folded Reload - ld.d $t5, $sp, 192 # 8-byte Folded Reload - ori $t3, $zero, 512 - ori $t7, $zero, 128 - ori $t8, $zero, 4092 + ld.d $s0, $sp, 176 # 8-byte Folded Reload + addi.w $s0, $s0, 1 + move $s1, $a2 + ori $a2, $zero, 512 + ori $t5, $zero, 128 + ori $t6, $zero, 4092 ori $a0, $zero, 10 - bne $s1, $a0, .LBB0_1 + ori $s3, $zero, 4060 + lu52i.d $s5, $zero, -1029 + bne $s0, $a0, .LBB0_1 # %bb.68: ld.d $s8, $sp, 264 # 8-byte Folded Reload ld.d $s7, $sp, 272 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/TSVC/ControlFlow-dbl/CMakeFiles/ControlFlow-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/ControlFlow-dbl/CMakeFiles/ControlFlow-dbl.dir/tsc.s index 9fc418f4..8206197d 100644 --- a/results/MultiSource/Benchmarks/TSVC/ControlFlow-dbl/CMakeFiles/ControlFlow-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/ControlFlow-dbl/CMakeFiles/ControlFlow-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -15555,245 +15447,161 @@ set: # @set bnez $a2, .LBB29_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB29_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB29_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB29_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB29_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB29_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB29_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB29_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB29_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB29_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB29_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB29_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB29_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB29_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB29_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB29_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB29_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB29_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB29_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB29_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB29_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB29_17: # %.preheader34.i47 +.LBB29_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -15868,45 +15676,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB29_17 + bnez $a4, .LBB29_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB29_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB29_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI29_0) - xvld $xr0, $a2, %pc_lo12(.LCPI29_0) - pcalau12i $a2, %pc_hi20(.LCPI29_1) - xvld $xr1, $a2, %pc_lo12(.LCPI29_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI29_0) + xvld $xr0, $a1, %pc_lo12(.LCPI29_0) + pcalau12i $a1, %pc_hi20(.LCPI29_1) + xvld $xr1, $a1, %pc_lo12(.LCPI29_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB29_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB29_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB29_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/ControlFlow-flt/CMakeFiles/ControlFlow-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/ControlFlow-flt/CMakeFiles/ControlFlow-flt.dir/tsc.s index dfc5983e..e47f90fa 100644 --- a/results/MultiSource/Benchmarks/TSVC/ControlFlow-flt/CMakeFiles/ControlFlow-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/ControlFlow-flt/CMakeFiles/ControlFlow-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -14711,181 +14603,129 @@ set: # @set bnez $a2, .LBB29_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB29_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB29_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB29_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB29_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB29_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB29_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB29_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB29_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB29_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB29_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB29_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB29_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB29_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB29_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB29_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB29_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB29_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB29_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB29_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB29_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB29_17: # %.preheader34.i47 +.LBB29_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -14928,45 +14768,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB29_17 + bnez $a4, .LBB29_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB29_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB29_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI29_0) - xvld $xr0, $a2, %pc_lo12(.LCPI29_0) - pcalau12i $a2, %pc_hi20(.LCPI29_1) - xvld $xr1, $a2, %pc_lo12(.LCPI29_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI29_0) + xvld $xr0, $a1, %pc_lo12(.LCPI29_0) + pcalau12i $a1, %pc_hi20(.LCPI29_1) + xvld $xr1, $a1, %pc_lo12(.LCPI29_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB29_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB29_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB29_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/ControlLoops-dbl/CMakeFiles/ControlLoops-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/ControlLoops-dbl/CMakeFiles/ControlLoops-dbl.dir/tsc.s index 2c8aca38..da336409 100644 --- a/results/MultiSource/Benchmarks/TSVC/ControlLoops-dbl/CMakeFiles/ControlLoops-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/ControlLoops-dbl/CMakeFiles/ControlLoops-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -13758,245 +13650,161 @@ set: # @set bnez $a2, .LBB20_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB20_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB20_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB20_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB20_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB20_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB20_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB20_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB20_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB20_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB20_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB20_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB20_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB20_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB20_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB20_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB20_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB20_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB20_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB20_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB20_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB20_17: # %.preheader34.i47 +.LBB20_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -14071,45 +13879,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB20_17 + bnez $a4, .LBB20_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB20_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB20_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI20_0) - xvld $xr0, $a2, %pc_lo12(.LCPI20_0) - pcalau12i $a2, %pc_hi20(.LCPI20_1) - xvld $xr1, $a2, %pc_lo12(.LCPI20_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI20_0) + xvld $xr0, $a1, %pc_lo12(.LCPI20_0) + pcalau12i $a1, %pc_hi20(.LCPI20_1) + xvld $xr1, $a1, %pc_lo12(.LCPI20_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB20_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB20_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB20_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/ControlLoops-flt/CMakeFiles/ControlLoops-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/ControlLoops-flt/CMakeFiles/ControlLoops-flt.dir/tsc.s index a58809ad..138a9c90 100644 --- a/results/MultiSource/Benchmarks/TSVC/ControlLoops-flt/CMakeFiles/ControlLoops-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/ControlLoops-flt/CMakeFiles/ControlLoops-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12929,181 +12821,129 @@ set: # @set bnez $a2, .LBB20_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB20_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB20_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB20_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB20_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB20_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB20_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB20_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB20_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB20_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB20_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB20_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB20_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB20_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB20_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB20_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB20_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB20_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB20_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB20_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB20_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB20_17: # %.preheader34.i47 +.LBB20_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13146,45 +12986,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB20_17 + bnez $a4, .LBB20_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB20_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB20_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI20_0) - xvld $xr0, $a2, %pc_lo12(.LCPI20_0) - pcalau12i $a2, %pc_hi20(.LCPI20_1) - xvld $xr1, $a2, %pc_lo12(.LCPI20_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI20_0) + xvld $xr0, $a1, %pc_lo12(.LCPI20_0) + pcalau12i $a1, %pc_hi20(.LCPI20_1) + xvld $xr1, $a1, %pc_lo12(.LCPI20_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB20_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB20_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB20_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CMakeFiles/CrossingThresholds-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CMakeFiles/CrossingThresholds-dbl.dir/tsc.s index cc5ebdcb..30b1c608 100644 --- a/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CMakeFiles/CrossingThresholds-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CMakeFiles/CrossingThresholds-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11960,8 +11852,7 @@ s1281: # @s1281 ori $s1, $a0, 2144 lu12i.w $a0, 250 ori $s2, $a0, 160 - lu52i.d $a0, $zero, -1025 - xvreplgr2vr.d $xr5, $a0 + xvldi $xr5, -784 add.d $a0, $fp, $s8 st.d $a0, $sp, 24 # 8-byte Folded Spill add.d $a0, $fp, $s0 @@ -13106,245 +12997,161 @@ set: # @set bnez $a2, .LBB15_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB15_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB15_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB15_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB15_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB15_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB15_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB15_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB15_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB15_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB15_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB15_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB15_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB15_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB15_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB15_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB15_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB15_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB15_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB15_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB15_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB15_17: # %.preheader34.i47 +.LBB15_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13419,45 +13226,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB15_17 + bnez $a4, .LBB15_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB15_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB15_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI15_0) - xvld $xr0, $a2, %pc_lo12(.LCPI15_0) - pcalau12i $a2, %pc_hi20(.LCPI15_1) - xvld $xr1, $a2, %pc_lo12(.LCPI15_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI15_0) + xvld $xr0, $a1, %pc_lo12(.LCPI15_0) + pcalau12i $a1, %pc_hi20(.LCPI15_1) + xvld $xr1, $a1, %pc_lo12(.LCPI15_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB15_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB15_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB15_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CMakeFiles/CrossingThresholds-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CMakeFiles/CrossingThresholds-flt.dir/tsc.s index 1e7f45b5..730460a1 100644 --- a/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CMakeFiles/CrossingThresholds-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CMakeFiles/CrossingThresholds-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11122,8 +11014,7 @@ s1281: # @s1281 ori $s1, $a0, 3152 lu12i.w $a0, 125 ori $s2, $a0, 128 - lu12i.w $a0, -264192 - xvreplgr2vr.w $xr5, $a0 + xvldi $xr5, -1296 add.d $a0, $fp, $s8 st.d $a0, $sp, 24 # 8-byte Folded Spill add.d $a0, $fp, $s0 @@ -12249,181 +12140,129 @@ set: # @set bnez $a2, .LBB15_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB15_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB15_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB15_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB15_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB15_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB15_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB15_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB15_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB15_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB15_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB15_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB15_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB15_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB15_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB15_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB15_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB15_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB15_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB15_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB15_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB15_17: # %.preheader34.i47 +.LBB15_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -12466,45 +12305,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB15_17 + bnez $a4, .LBB15_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB15_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB15_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI15_0) - xvld $xr0, $a2, %pc_lo12(.LCPI15_0) - pcalau12i $a2, %pc_hi20(.LCPI15_1) - xvld $xr1, $a2, %pc_lo12(.LCPI15_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI15_0) + xvld $xr0, $a1, %pc_lo12(.LCPI15_0) + pcalau12i $a1, %pc_hi20(.LCPI15_1) + xvld $xr1, $a1, %pc_lo12(.LCPI15_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB15_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB15_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB15_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Equivalencing-dbl/CMakeFiles/Equivalencing-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Equivalencing-dbl/CMakeFiles/Equivalencing-dbl.dir/tsc.s index d4b055c2..0c12f567 100644 --- a/results/MultiSource/Benchmarks/TSVC/Equivalencing-dbl/CMakeFiles/Equivalencing-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Equivalencing-dbl/CMakeFiles/Equivalencing-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11800,8 +11692,7 @@ s421: # @s421 addi.d $a0, $a0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB6_1: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -12006,8 +11897,7 @@ s1421: # @s1421 addi.d $a0, $a0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB7_1: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -12411,8 +12301,7 @@ s423: # @s423 addi.d $a0, $s7, 544 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB9_1: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -12873,245 +12762,161 @@ set: # @set bnez $a2, .LBB12_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB12_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB12_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB12_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB12_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB12_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB12_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB12_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB12_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB12_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB12_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB12_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB12_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB12_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB12_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB12_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB12_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB12_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB12_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB12_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB12_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB12_17: # %.preheader34.i47 +.LBB12_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13186,45 +12991,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB12_17 + bnez $a4, .LBB12_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB12_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB12_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI12_0) - xvld $xr0, $a2, %pc_lo12(.LCPI12_0) - pcalau12i $a2, %pc_hi20(.LCPI12_1) - xvld $xr1, $a2, %pc_lo12(.LCPI12_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI12_0) + xvld $xr0, $a1, %pc_lo12(.LCPI12_0) + pcalau12i $a1, %pc_hi20(.LCPI12_1) + xvld $xr1, $a1, %pc_lo12(.LCPI12_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB12_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB12_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB12_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Equivalencing-flt/CMakeFiles/Equivalencing-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Equivalencing-flt/CMakeFiles/Equivalencing-flt.dir/tsc.s index 262eebbf..e8bb69fe 100644 --- a/results/MultiSource/Benchmarks/TSVC/Equivalencing-flt/CMakeFiles/Equivalencing-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Equivalencing-flt/CMakeFiles/Equivalencing-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10961,8 +10853,7 @@ s421: # @s421 addi.d $a0, $a0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB6_1: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -11194,8 +11085,7 @@ s1421: # @s1421 addi.d $a0, $a0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB7_1: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -11598,8 +11488,7 @@ s423: # @s423 addi.d $a0, $s7, 288 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB9_1: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -12119,181 +12008,129 @@ set: # @set bnez $a2, .LBB12_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB12_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB12_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB12_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB12_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB12_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB12_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB12_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB12_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB12_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB12_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB12_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB12_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB12_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB12_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB12_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB12_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB12_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB12_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB12_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB12_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB12_17: # %.preheader34.i47 +.LBB12_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -12336,45 +12173,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB12_17 + bnez $a4, .LBB12_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB12_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB12_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI12_0) - xvld $xr0, $a2, %pc_lo12(.LCPI12_0) - pcalau12i $a2, %pc_hi20(.LCPI12_1) - xvld $xr1, $a2, %pc_lo12(.LCPI12_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI12_0) + xvld $xr0, $a1, %pc_lo12(.LCPI12_0) + pcalau12i $a1, %pc_hi20(.LCPI12_1) + xvld $xr1, $a1, %pc_lo12(.LCPI12_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB12_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB12_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB12_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Expansion-dbl/CMakeFiles/Expansion-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Expansion-dbl/CMakeFiles/Expansion-dbl.dir/tsc.s index 1e29fa90..1070e073 100644 --- a/results/MultiSource/Benchmarks/TSVC/Expansion-dbl/CMakeFiles/Expansion-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Expansion-dbl/CMakeFiles/Expansion-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12776,8 +12668,7 @@ s254: # @s254 lu12i.w $a1, 62 ori $s7, $a1, 2080 ori $s0, $a1, 2112 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr5, $a2 + xvldi $xr5, -928 ori $s1, $a1, 2048 add.d $a1, $fp, $s7 st.d $a1, $sp, 24 # 8-byte Folded Spill @@ -13875,245 +13766,161 @@ set: # @set bnez $a2, .LBB19_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB19_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB19_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB19_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB19_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB19_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB19_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB19_17: # %.preheader34.i47 +.LBB19_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -14188,45 +13995,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB19_17 + bnez $a4, .LBB19_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB19_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB19_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI19_0) - xvld $xr0, $a2, %pc_lo12(.LCPI19_0) - pcalau12i $a2, %pc_hi20(.LCPI19_1) - xvld $xr1, $a2, %pc_lo12(.LCPI19_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI19_0) + xvld $xr0, $a1, %pc_lo12(.LCPI19_0) + pcalau12i $a1, %pc_hi20(.LCPI19_1) + xvld $xr1, $a1, %pc_lo12(.LCPI19_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB19_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB19_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB19_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s index 4a7d58e0..5e52d42b 100644 --- a/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12010,8 +11902,7 @@ s254: # @s254 st.d $a1, $sp, 48 # 8-byte Folded Spill lu12i.w $a1, 31 ori $s7, $a1, 1040 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr3, $a2 + xvldi $xr3, -3265 ori $s0, $a1, 1024 add.d $a1, $fp, $s7 st.d $a1, $sp, 8 # 8-byte Folded Spill @@ -13173,181 +13064,129 @@ set: # @set bnez $a2, .LBB19_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB19_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB19_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB19_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB19_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB19_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB19_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB19_17: # %.preheader34.i47 +.LBB19_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13390,45 +13229,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB19_17 + bnez $a4, .LBB19_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB19_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB19_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI19_0) - xvld $xr0, $a2, %pc_lo12(.LCPI19_0) - pcalau12i $a2, %pc_hi20(.LCPI19_1) - xvld $xr1, $a2, %pc_lo12(.LCPI19_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI19_0) + xvld $xr0, $a1, %pc_lo12(.LCPI19_0) + pcalau12i $a1, %pc_hi20(.LCPI19_1) + xvld $xr1, $a1, %pc_lo12(.LCPI19_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB19_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB19_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB19_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/GlobalDataFlow-dbl/CMakeFiles/GlobalDataFlow-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/GlobalDataFlow-dbl/CMakeFiles/GlobalDataFlow-dbl.dir/tsc.s index 90a15631..a5db2050 100644 --- a/results/MultiSource/Benchmarks/TSVC/GlobalDataFlow-dbl/CMakeFiles/GlobalDataFlow-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/GlobalDataFlow-dbl/CMakeFiles/GlobalDataFlow-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -13658,245 +13550,161 @@ set: # @set bnez $a2, .LBB19_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB19_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB19_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB19_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB19_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB19_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB19_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB19_17: # %.preheader34.i47 +.LBB19_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13971,45 +13779,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB19_17 + bnez $a4, .LBB19_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB19_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB19_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI19_0) - xvld $xr0, $a2, %pc_lo12(.LCPI19_0) - pcalau12i $a2, %pc_hi20(.LCPI19_1) - xvld $xr1, $a2, %pc_lo12(.LCPI19_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI19_0) + xvld $xr0, $a1, %pc_lo12(.LCPI19_0) + pcalau12i $a1, %pc_hi20(.LCPI19_1) + xvld $xr1, $a1, %pc_lo12(.LCPI19_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB19_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB19_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB19_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/GlobalDataFlow-flt/CMakeFiles/GlobalDataFlow-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/GlobalDataFlow-flt/CMakeFiles/GlobalDataFlow-flt.dir/tsc.s index 4ceac475..71fa2209 100644 --- a/results/MultiSource/Benchmarks/TSVC/GlobalDataFlow-flt/CMakeFiles/GlobalDataFlow-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/GlobalDataFlow-flt/CMakeFiles/GlobalDataFlow-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -13048,181 +12940,129 @@ set: # @set bnez $a2, .LBB19_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB19_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB19_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB19_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB19_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB19_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB19_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB19_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB19_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB19_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB19_17: # %.preheader34.i47 +.LBB19_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13265,45 +13105,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB19_17 + bnez $a4, .LBB19_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB19_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB19_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI19_0) - xvld $xr0, $a2, %pc_lo12(.LCPI19_0) - pcalau12i $a2, %pc_hi20(.LCPI19_1) - xvld $xr1, $a2, %pc_lo12(.LCPI19_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI19_0) + xvld $xr0, $a1, %pc_lo12(.LCPI19_0) + pcalau12i $a1, %pc_hi20(.LCPI19_1) + xvld $xr1, $a1, %pc_lo12(.LCPI19_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB19_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB19_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB19_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/IndirectAddressing-dbl/CMakeFiles/IndirectAddressing-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/IndirectAddressing-dbl/CMakeFiles/IndirectAddressing-dbl.dir/tsc.s index f1b687ef..07b6167e 100644 --- a/results/MultiSource/Benchmarks/TSVC/IndirectAddressing-dbl/CMakeFiles/IndirectAddressing-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/IndirectAddressing-dbl/CMakeFiles/IndirectAddressing-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12878,245 +12770,161 @@ set: # @set bnez $a2, .LBB14_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB14_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB14_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB14_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB14_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB14_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB14_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB14_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB14_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB14_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB14_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB14_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB14_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB14_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB14_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB14_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB14_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB14_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB14_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB14_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB14_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB14_17: # %.preheader34.i47 +.LBB14_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13191,45 +12999,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB14_17 + bnez $a4, .LBB14_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB14_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB14_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI14_0) - xvld $xr0, $a2, %pc_lo12(.LCPI14_0) - pcalau12i $a2, %pc_hi20(.LCPI14_1) - xvld $xr1, $a2, %pc_lo12(.LCPI14_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI14_0) + xvld $xr0, $a1, %pc_lo12(.LCPI14_0) + pcalau12i $a1, %pc_hi20(.LCPI14_1) + xvld $xr1, $a1, %pc_lo12(.LCPI14_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB14_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB14_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB14_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/IndirectAddressing-flt/CMakeFiles/IndirectAddressing-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/IndirectAddressing-flt/CMakeFiles/IndirectAddressing-flt.dir/tsc.s index 24446b6b..43305a52 100644 --- a/results/MultiSource/Benchmarks/TSVC/IndirectAddressing-flt/CMakeFiles/IndirectAddressing-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/IndirectAddressing-flt/CMakeFiles/IndirectAddressing-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12044,181 +11936,129 @@ set: # @set bnez $a2, .LBB14_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB14_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB14_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB14_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB14_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB14_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB14_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB14_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB14_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB14_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB14_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB14_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB14_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB14_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB14_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB14_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB14_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB14_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB14_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB14_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB14_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB14_17: # %.preheader34.i47 +.LBB14_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -12261,45 +12101,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB14_17 + bnez $a4, .LBB14_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB14_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB14_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI14_0) - xvld $xr0, $a2, %pc_lo12(.LCPI14_0) - pcalau12i $a2, %pc_hi20(.LCPI14_1) - xvld $xr1, $a2, %pc_lo12(.LCPI14_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI14_0) + xvld $xr0, $a1, %pc_lo12(.LCPI14_0) + pcalau12i $a1, %pc_hi20(.LCPI14_1) + xvld $xr1, $a1, %pc_lo12(.LCPI14_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB14_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB14_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB14_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/InductionVariable-dbl/CMakeFiles/InductionVariable-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/InductionVariable-dbl/CMakeFiles/InductionVariable-dbl.dir/tsc.s index bb798452..692f6770 100644 --- a/results/MultiSource/Benchmarks/TSVC/InductionVariable-dbl/CMakeFiles/InductionVariable-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/InductionVariable-dbl/CMakeFiles/InductionVariable-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -13391,245 +13283,161 @@ set: # @set bnez $a2, .LBB16_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB16_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB16_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB16_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB16_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB16_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB16_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB16_17: # %.preheader34.i47 +.LBB16_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13704,45 +13512,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB16_17 + bnez $a4, .LBB16_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB16_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB16_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI16_0) - xvld $xr0, $a2, %pc_lo12(.LCPI16_0) - pcalau12i $a2, %pc_hi20(.LCPI16_1) - xvld $xr1, $a2, %pc_lo12(.LCPI16_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI16_0) + xvld $xr0, $a1, %pc_lo12(.LCPI16_0) + pcalau12i $a1, %pc_hi20(.LCPI16_1) + xvld $xr1, $a1, %pc_lo12(.LCPI16_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB16_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB16_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB16_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/InductionVariable-flt/CMakeFiles/InductionVariable-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/InductionVariable-flt/CMakeFiles/InductionVariable-flt.dir/tsc.s index 9afee41a..2c4de1c5 100644 --- a/results/MultiSource/Benchmarks/TSVC/InductionVariable-flt/CMakeFiles/InductionVariable-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/InductionVariable-flt/CMakeFiles/InductionVariable-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12734,181 +12626,129 @@ set: # @set bnez $a2, .LBB16_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB16_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB16_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB16_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB16_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB16_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB16_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB16_17: # %.preheader34.i47 +.LBB16_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -12951,45 +12791,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB16_17 + bnez $a4, .LBB16_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB16_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB16_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI16_0) - xvld $xr0, $a2, %pc_lo12(.LCPI16_0) - pcalau12i $a2, %pc_hi20(.LCPI16_1) - xvld $xr1, $a2, %pc_lo12(.LCPI16_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI16_0) + xvld $xr0, $a1, %pc_lo12(.LCPI16_0) + pcalau12i $a1, %pc_hi20(.LCPI16_1) + xvld $xr1, $a1, %pc_lo12(.LCPI16_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB16_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB16_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB16_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/LinearDependence-dbl/CMakeFiles/LinearDependence-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/LinearDependence-dbl/CMakeFiles/LinearDependence-dbl.dir/tsc.s index 1ce8ce81..d73f8082 100644 --- a/results/MultiSource/Benchmarks/TSVC/LinearDependence-dbl/CMakeFiles/LinearDependence-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/LinearDependence-dbl/CMakeFiles/LinearDependence-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11814,8 +11706,7 @@ s000: # @s000 addi.d $s8, $s0, 32 lu12i.w $a0, 7 ori $s7, $a0, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr2, $a0 + xvldi $xr2, -912 pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 312 @@ -12420,8 +12311,7 @@ s1112: # @s1112 pcalau12i $a1, %pc_hi20(global_data) addi.d $fp, $a1, %pc_lo12(global_data) move $s7, $zero - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr1, $a1 + xvldi $xr1, -912 ori $a0, $a0, 2080 add.d $s0, $fp, $a0 ori $a0, $a2, 64 @@ -14226,245 +14116,161 @@ set: # @set bnez $a2, .LBB21_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB21_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB21_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB21_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB21_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB21_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB21_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB21_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB21_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB21_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB21_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB21_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB21_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB21_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB21_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB21_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB21_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB21_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB21_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB21_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB21_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB21_17: # %.preheader34.i47 +.LBB21_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -14539,45 +14345,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB21_17 + bnez $a4, .LBB21_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB21_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB21_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI21_0) - xvld $xr0, $a2, %pc_lo12(.LCPI21_0) - pcalau12i $a2, %pc_hi20(.LCPI21_1) - xvld $xr1, $a2, %pc_lo12(.LCPI21_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI21_0) + xvld $xr0, $a1, %pc_lo12(.LCPI21_0) + pcalau12i $a1, %pc_hi20(.LCPI21_1) + xvld $xr1, $a1, %pc_lo12(.LCPI21_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB21_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB21_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB21_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/LinearDependence-flt/CMakeFiles/LinearDependence-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/LinearDependence-flt/CMakeFiles/LinearDependence-flt.dir/tsc.s index 57967837..30735398 100644 --- a/results/MultiSource/Benchmarks/TSVC/LinearDependence-flt/CMakeFiles/LinearDependence-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/LinearDependence-flt/CMakeFiles/LinearDependence-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10975,8 +10867,7 @@ s000: # @s000 addi.d $s8, $s0, 32 lu12i.w $a0, 7 ori $s7, $a0, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr2, $a0 + xvldi $xr2, -1424 pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 156 @@ -11637,8 +11528,7 @@ s1112: # @s1112 pcalau12i $a2, %pc_hi20(global_data) addi.d $fp, $a2, %pc_lo12(global_data) move $s8, $zero - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr1, $a2 + xvldi $xr1, -1424 ori $a0, $a0, 1040 add.d $s0, $fp, $a0 ori $a0, $a1, 2096 @@ -13661,181 +13551,129 @@ set: # @set bnez $a2, .LBB21_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB21_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB21_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB21_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB21_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB21_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB21_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB21_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB21_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB21_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB21_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB21_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB21_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB21_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB21_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB21_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB21_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB21_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB21_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB21_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB21_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB21_17: # %.preheader34.i47 +.LBB21_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13878,45 +13716,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB21_17 + bnez $a4, .LBB21_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB21_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB21_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI21_0) - xvld $xr0, $a2, %pc_lo12(.LCPI21_0) - pcalau12i $a2, %pc_hi20(.LCPI21_1) - xvld $xr1, $a2, %pc_lo12(.LCPI21_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI21_0) + xvld $xr0, $a1, %pc_lo12(.LCPI21_0) + pcalau12i $a1, %pc_hi20(.LCPI21_1) + xvld $xr1, $a1, %pc_lo12(.LCPI21_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB21_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB21_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB21_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/LoopRerolling-dbl/CMakeFiles/LoopRerolling-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/LoopRerolling-dbl/CMakeFiles/LoopRerolling-dbl.dir/tsc.s index 98a035b5..f0245c58 100644 --- a/results/MultiSource/Benchmarks/TSVC/LoopRerolling-dbl/CMakeFiles/LoopRerolling-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/LoopRerolling-dbl/CMakeFiles/LoopRerolling-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12456,245 +12348,161 @@ set: # @set bnez $a2, .LBB11_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB11_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB11_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB11_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB11_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB11_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB11_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB11_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB11_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB11_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB11_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB11_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB11_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB11_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB11_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB11_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB11_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB11_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB11_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB11_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB11_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB11_17: # %.preheader34.i47 +.LBB11_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -12769,45 +12577,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB11_17 + bnez $a4, .LBB11_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB11_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB11_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI11_0) - xvld $xr0, $a2, %pc_lo12(.LCPI11_0) - pcalau12i $a2, %pc_hi20(.LCPI11_1) - xvld $xr1, $a2, %pc_lo12(.LCPI11_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI11_0) + xvld $xr0, $a1, %pc_lo12(.LCPI11_0) + pcalau12i $a1, %pc_hi20(.LCPI11_1) + xvld $xr1, $a1, %pc_lo12(.LCPI11_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB11_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB11_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB11_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/LoopRerolling-flt/CMakeFiles/LoopRerolling-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/LoopRerolling-flt/CMakeFiles/LoopRerolling-flt.dir/tsc.s index 964aada7..c03f8d78 100644 --- a/results/MultiSource/Benchmarks/TSVC/LoopRerolling-flt/CMakeFiles/LoopRerolling-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/LoopRerolling-flt/CMakeFiles/LoopRerolling-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11621,181 +11513,129 @@ set: # @set bnez $a2, .LBB11_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB11_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB11_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB11_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB11_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB11_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB11_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB11_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB11_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB11_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB11_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB11_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB11_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB11_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB11_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB11_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB11_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB11_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB11_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB11_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB11_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB11_17: # %.preheader34.i47 +.LBB11_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -11838,45 +11678,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB11_17 + bnez $a4, .LBB11_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB11_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB11_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI11_0) - xvld $xr0, $a2, %pc_lo12(.LCPI11_0) - pcalau12i $a2, %pc_hi20(.LCPI11_1) - xvld $xr1, $a2, %pc_lo12(.LCPI11_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI11_0) + xvld $xr0, $a1, %pc_lo12(.LCPI11_0) + pcalau12i $a1, %pc_hi20(.LCPI11_1) + xvld $xr1, $a1, %pc_lo12(.LCPI11_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB11_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB11_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB11_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/LoopRestructuring-dbl/CMakeFiles/LoopRestructuring-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/LoopRestructuring-dbl/CMakeFiles/LoopRestructuring-dbl.dir/tsc.s index 8dac74bc..beb07a72 100644 --- a/results/MultiSource/Benchmarks/TSVC/LoopRestructuring-dbl/CMakeFiles/LoopRestructuring-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/LoopRestructuring-dbl/CMakeFiles/LoopRestructuring-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -13554,245 +13446,161 @@ set: # @set bnez $a2, .LBB16_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB16_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB16_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB16_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB16_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB16_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB16_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB16_17: # %.preheader34.i47 +.LBB16_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13867,45 +13675,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB16_17 + bnez $a4, .LBB16_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB16_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB16_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI16_0) - xvld $xr0, $a2, %pc_lo12(.LCPI16_0) - pcalau12i $a2, %pc_hi20(.LCPI16_1) - xvld $xr1, $a2, %pc_lo12(.LCPI16_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI16_0) + xvld $xr0, $a1, %pc_lo12(.LCPI16_0) + pcalau12i $a1, %pc_hi20(.LCPI16_1) + xvld $xr1, $a1, %pc_lo12(.LCPI16_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB16_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB16_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB16_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/LoopRestructuring-flt/CMakeFiles/LoopRestructuring-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/LoopRestructuring-flt/CMakeFiles/LoopRestructuring-flt.dir/tsc.s index c62d5a55..f4525bb1 100644 --- a/results/MultiSource/Benchmarks/TSVC/LoopRestructuring-flt/CMakeFiles/LoopRestructuring-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/LoopRestructuring-flt/CMakeFiles/LoopRestructuring-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12768,181 +12660,129 @@ set: # @set bnez $a2, .LBB16_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB16_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB16_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB16_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB16_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB16_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB16_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB16_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB16_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB16_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB16_17: # %.preheader34.i47 +.LBB16_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -12985,45 +12825,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB16_17 + bnez $a4, .LBB16_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB16_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB16_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI16_0) - xvld $xr0, $a2, %pc_lo12(.LCPI16_0) - pcalau12i $a2, %pc_hi20(.LCPI16_1) - xvld $xr1, $a2, %pc_lo12(.LCPI16_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI16_0) + xvld $xr0, $a1, %pc_lo12(.LCPI16_0) + pcalau12i $a1, %pc_hi20(.LCPI16_1) + xvld $xr1, $a1, %pc_lo12(.LCPI16_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB16_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB16_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB16_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/NodeSplitting-dbl/CMakeFiles/NodeSplitting-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/NodeSplitting-dbl/CMakeFiles/NodeSplitting-dbl.dir/tsc.s index 59d8d21e..b32d1ccc 100644 --- a/results/MultiSource/Benchmarks/TSVC/NodeSplitting-dbl/CMakeFiles/NodeSplitting-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/NodeSplitting-dbl/CMakeFiles/NodeSplitting-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12854,245 +12746,161 @@ set: # @set bnez $a2, .LBB13_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB13_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB13_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB13_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB13_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB13_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB13_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB13_17: # %.preheader34.i47 +.LBB13_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13167,45 +12975,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB13_17 + bnez $a4, .LBB13_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB13_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB13_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI13_0) - xvld $xr0, $a2, %pc_lo12(.LCPI13_0) - pcalau12i $a2, %pc_hi20(.LCPI13_1) - xvld $xr1, $a2, %pc_lo12(.LCPI13_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI13_0) + xvld $xr0, $a1, %pc_lo12(.LCPI13_0) + pcalau12i $a1, %pc_hi20(.LCPI13_1) + xvld $xr1, $a1, %pc_lo12(.LCPI13_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB13_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB13_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB13_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/NodeSplitting-flt/CMakeFiles/NodeSplitting-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/NodeSplitting-flt/CMakeFiles/NodeSplitting-flt.dir/tsc.s index 8275b24a..a4e64a98 100644 --- a/results/MultiSource/Benchmarks/TSVC/NodeSplitting-flt/CMakeFiles/NodeSplitting-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/NodeSplitting-flt/CMakeFiles/NodeSplitting-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12138,181 +12030,129 @@ set: # @set bnez $a2, .LBB13_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB13_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB13_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB13_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB13_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB13_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB13_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB13_17: # %.preheader34.i47 +.LBB13_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -12355,45 +12195,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB13_17 + bnez $a4, .LBB13_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB13_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB13_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI13_0) - xvld $xr0, $a2, %pc_lo12(.LCPI13_0) - pcalau12i $a2, %pc_hi20(.LCPI13_1) - xvld $xr1, $a2, %pc_lo12(.LCPI13_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI13_0) + xvld $xr0, $a1, %pc_lo12(.LCPI13_0) + pcalau12i $a1, %pc_hi20(.LCPI13_1) + xvld $xr1, $a1, %pc_lo12(.LCPI13_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB13_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB13_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB13_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Packing-dbl/CMakeFiles/Packing-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Packing-dbl/CMakeFiles/Packing-dbl.dir/tsc.s index 229cd3d0..093a05c3 100644 --- a/results/MultiSource/Benchmarks/TSVC/Packing-dbl/CMakeFiles/Packing-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Packing-dbl/CMakeFiles/Packing-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12304,245 +12196,161 @@ set: # @set bnez $a2, .LBB10_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB10_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB10_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB10_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB10_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB10_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB10_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB10_17: # %.preheader34.i47 +.LBB10_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -12617,45 +12425,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB10_17 + bnez $a4, .LBB10_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB10_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB10_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI10_0) - xvld $xr0, $a2, %pc_lo12(.LCPI10_0) - pcalau12i $a2, %pc_hi20(.LCPI10_1) - xvld $xr1, $a2, %pc_lo12(.LCPI10_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI10_0) + xvld $xr0, $a1, %pc_lo12(.LCPI10_0) + pcalau12i $a1, %pc_hi20(.LCPI10_1) + xvld $xr1, $a1, %pc_lo12(.LCPI10_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB10_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB10_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB10_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Packing-flt/CMakeFiles/Packing-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Packing-flt/CMakeFiles/Packing-flt.dir/tsc.s index a63f5084..ed947cb5 100644 --- a/results/MultiSource/Benchmarks/TSVC/Packing-flt/CMakeFiles/Packing-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Packing-flt/CMakeFiles/Packing-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11466,181 +11358,129 @@ set: # @set bnez $a2, .LBB10_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB10_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB10_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB10_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB10_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB10_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB10_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB10_17: # %.preheader34.i47 +.LBB10_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -11683,45 +11523,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB10_17 + bnez $a4, .LBB10_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB10_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB10_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI10_0) - xvld $xr0, $a2, %pc_lo12(.LCPI10_0) - pcalau12i $a2, %pc_hi20(.LCPI10_1) - xvld $xr1, $a2, %pc_lo12(.LCPI10_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI10_0) + xvld $xr0, $a1, %pc_lo12(.LCPI10_0) + pcalau12i $a1, %pc_hi20(.LCPI10_1) + xvld $xr1, $a1, %pc_lo12(.LCPI10_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB10_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB10_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB10_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Recurrences-dbl/CMakeFiles/Recurrences-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Recurrences-dbl/CMakeFiles/Recurrences-dbl.dir/tsc.s index f34f40d7..da7e54a4 100644 --- a/results/MultiSource/Benchmarks/TSVC/Recurrences-dbl/CMakeFiles/Recurrences-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Recurrences-dbl/CMakeFiles/Recurrences-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12283,245 +12175,161 @@ set: # @set bnez $a2, .LBB10_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB10_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB10_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB10_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB10_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB10_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB10_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB10_17: # %.preheader34.i47 +.LBB10_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -12596,45 +12404,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB10_17 + bnez $a4, .LBB10_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB10_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB10_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI10_0) - xvld $xr0, $a2, %pc_lo12(.LCPI10_0) - pcalau12i $a2, %pc_hi20(.LCPI10_1) - xvld $xr1, $a2, %pc_lo12(.LCPI10_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI10_0) + xvld $xr0, $a1, %pc_lo12(.LCPI10_0) + pcalau12i $a1, %pc_hi20(.LCPI10_1) + xvld $xr1, $a1, %pc_lo12(.LCPI10_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB10_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB10_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB10_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Recurrences-flt/CMakeFiles/Recurrences-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Recurrences-flt/CMakeFiles/Recurrences-flt.dir/tsc.s index c3b4b762..4c3de597 100644 --- a/results/MultiSource/Benchmarks/TSVC/Recurrences-flt/CMakeFiles/Recurrences-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Recurrences-flt/CMakeFiles/Recurrences-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11447,181 +11339,129 @@ set: # @set bnez $a2, .LBB10_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB10_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB10_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB10_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB10_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB10_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB10_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB10_17: # %.preheader34.i47 +.LBB10_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -11664,45 +11504,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB10_17 + bnez $a4, .LBB10_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB10_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB10_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI10_0) - xvld $xr0, $a2, %pc_lo12(.LCPI10_0) - pcalau12i $a2, %pc_hi20(.LCPI10_1) - xvld $xr1, $a2, %pc_lo12(.LCPI10_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI10_0) + xvld $xr0, $a1, %pc_lo12(.LCPI10_0) + pcalau12i $a1, %pc_hi20(.LCPI10_1) + xvld $xr1, $a1, %pc_lo12(.LCPI10_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB10_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB10_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB10_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s index c7b5ecfa..6442a931 100644 --- a/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12544,7 +12436,7 @@ s315: # @s315 lu12i.w $a3, 67108 ori $a3, $a3, 3539 vreplgr2vr.w $vr2, $a3 - vreplgr2vr.w $vr3, $a2 + vldi $vr3, -3715 .p2align 4, , 16 .LBB12_1: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -14015,245 +13907,161 @@ set: # @set bnez $a2, .LBB23_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB23_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB23_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB23_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB23_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB23_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB23_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB23_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB23_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB23_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB23_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB23_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB23_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB23_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB23_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB23_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB23_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB23_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB23_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB23_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB23_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB23_17: # %.preheader34.i47 +.LBB23_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -14328,45 +14136,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB23_17 + bnez $a4, .LBB23_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB23_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB23_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI23_0) - xvld $xr0, $a2, %pc_lo12(.LCPI23_0) - pcalau12i $a2, %pc_hi20(.LCPI23_1) - xvld $xr1, $a2, %pc_lo12(.LCPI23_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI23_0) + xvld $xr0, $a1, %pc_lo12(.LCPI23_0) + pcalau12i $a1, %pc_hi20(.LCPI23_1) + xvld $xr1, $a1, %pc_lo12(.LCPI23_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB23_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB23_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB23_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s index 6c7532e6..77633b4c 100644 --- a/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11705,7 +11597,7 @@ s315: # @s315 lu12i.w $a3, 67108 ori $a3, $a3, 3539 xvreplgr2vr.w $xr2, $a3 - xvreplgr2vr.w $xr3, $a2 + xvldi $xr3, -3715 .p2align 4, , 16 .LBB12_1: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -13172,181 +13064,129 @@ set: # @set bnez $a2, .LBB23_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB23_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB23_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB23_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB23_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB23_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB23_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB23_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB23_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB23_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB23_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB23_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB23_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB23_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB23_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB23_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB23_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB23_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB23_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB23_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB23_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB23_17: # %.preheader34.i47 +.LBB23_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13389,45 +13229,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB23_17 + bnez $a4, .LBB23_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB23_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB23_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI23_0) - xvld $xr0, $a2, %pc_lo12(.LCPI23_0) - pcalau12i $a2, %pc_hi20(.LCPI23_1) - xvld $xr1, $a2, %pc_lo12(.LCPI23_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI23_0) + xvld $xr0, $a1, %pc_lo12(.LCPI23_0) + pcalau12i $a1, %pc_hi20(.LCPI23_1) + xvld $xr1, $a1, %pc_lo12(.LCPI23_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB23_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB23_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB23_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Searching-dbl/CMakeFiles/Searching-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Searching-dbl/CMakeFiles/Searching-dbl.dir/tsc.s index 3145234d..6a57f327 100644 --- a/results/MultiSource/Benchmarks/TSVC/Searching-dbl/CMakeFiles/Searching-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Searching-dbl/CMakeFiles/Searching-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11790,38 +11682,35 @@ init: # @init .type s331,@function s331: # @s331 # %bb.0: - addi.d $sp, $sp, -208 - st.d $ra, $sp, 200 # 8-byte Folded Spill - st.d $fp, $sp, 192 # 8-byte Folded Spill - st.d $s0, $sp, 184 # 8-byte Folded Spill - st.d $s1, $sp, 176 # 8-byte Folded Spill - st.d $s2, $sp, 168 # 8-byte Folded Spill - st.d $s3, $sp, 160 # 8-byte Folded Spill - st.d $s4, $sp, 152 # 8-byte Folded Spill - st.d $s5, $sp, 144 # 8-byte Folded Spill - st.d $s6, $sp, 136 # 8-byte Folded Spill - st.d $s7, $sp, 128 # 8-byte Folded Spill - st.d $s8, $sp, 120 # 8-byte Folded Spill - fst.d $fs0, $sp, 112 # 8-byte Folded Spill + addi.d $sp, $sp, -192 + st.d $ra, $sp, 184 # 8-byte Folded Spill + st.d $fp, $sp, 176 # 8-byte Folded Spill + st.d $s0, $sp, 168 # 8-byte Folded Spill + st.d $s1, $sp, 160 # 8-byte Folded Spill + st.d $s2, $sp, 152 # 8-byte Folded Spill + st.d $s3, $sp, 144 # 8-byte Folded Spill + st.d $s4, $sp, 136 # 8-byte Folded Spill + st.d $s5, $sp, 128 # 8-byte Folded Spill + st.d $s6, $sp, 120 # 8-byte Folded Spill + st.d $s7, $sp, 112 # 8-byte Folded Spill + st.d $s8, $sp, 104 # 8-byte Folded Spill + fst.d $fs0, $sp, 96 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.88) addi.d $a0, $a0, %pc_lo12(.L.str.88) pcaddu18i $ra, %call36(init) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(ntimes) - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill ld.w $a0, $a0, %pc_lo12(ntimes) blez $a0, .LBB6_6 # %bb.1: # %vector.ph.preheader pcalau12i $a0, %pc_hi20(.LCPI6_0) vld $vr0, $a0, %pc_lo12(.LCPI6_0) - vst $vr0, $sp, 80 # 16-byte Folded Spill - lu12i.w $a0, -524288 - vreplgr2vr.w $vr0, $a0 vst $vr0, $sp, 64 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(global_data) - addi.d $fp, $a0, %pc_lo12(global_data) + addi.d $a1, $a0, %pc_lo12(global_data) move $s8, $zero - addi.d $a0, $fp, 32 + addi.d $a0, $a1, 32 st.d $a0, $sp, 56 # 8-byte Folded Spill lu12i.w $a0, 7 ori $a0, $a0, 3328 @@ -11831,35 +11720,36 @@ s331: # @s331 st.d $a0, $sp, 8 # 8-byte Folded Spill lu12i.w $a0, 62 ori $a0, $a0, 2080 - add.d $s0, $fp, $a0 + add.d $s0, $a1, $a0 lu12i.w $a0, 125 ori $a0, $a0, 64 - add.d $s1, $fp, $a0 + add.d $s1, $a1, $a0 lu12i.w $a0, 187 ori $a0, $a0, 2144 - add.d $s2, $fp, $a0 + add.d $s2, $a1, $a0 lu12i.w $a0, 250 ori $a0, $a0, 160 - add.d $s3, $fp, $a0 + add.d $s3, $a1, $a0 lu12i.w $a0, 312 ori $a0, $a0, 2272 - add.d $s4, $fp, $a0 + add.d $s4, $a1, $a0 lu12i.w $a0, 440 ori $a0, $a0, 2368 - add.d $s5, $fp, $a0 + add.d $s5, $a1, $a0 lu12i.w $a0, 568 ori $a0, $a0, 2464 - add.d $s6, $fp, $a0 + move $s7, $a1 + add.d $s6, $a1, $a0 xvst $xr7, $sp, 16 # 32-byte Folded Spill .p2align 4, , 16 .LBB6_2: # %vector.ph # =>This Loop Header: Depth=1 # Child Loop BB6_3 Depth 2 + vldi $vr0, -3200 ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.d $a1, $sp, 48 # 8-byte Folded Reload - vld $vr1, $sp, 64 # 16-byte Folded Reload - vori.b $vr0, $vr1, 0 - vld $vr2, $sp, 80 # 16-byte Folded Reload + vldi $vr1, -3200 + vld $vr2, $sp, 64 # 16-byte Folded Reload .p2align 4, , 16 .LBB6_3: # %vector.body # Parent Loop BB6_2 Depth=1 @@ -11905,10 +11795,10 @@ s331: # @s331 masknez $a0, $a0, $a1 ld.d $a2, $sp, 8 # 8-byte Folded Reload maskeqz $a1, $a2, $a1 - or $s7, $a1, $a0 - movgr2fr.w $fa0, $s7 + or $fp, $a1, $a0 + movgr2fr.w $fa0, $fp ffint.d.w $fa0, $fa0 - move $a0, $fp + move $a0, $s7 move $a1, $s0 move $a2, $s1 move $a3, $s2 @@ -11919,12 +11809,12 @@ s331: # @s331 pcaddu18i $ra, %call36(dummy) jirl $ra, $ra, 0 xvld $xr7, $sp, 16 # 32-byte Folded Reload - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(ntimes) addi.w $s8, $s8, 1 blt $s8, $a0, .LBB6_2 # %bb.5: # %._crit_edge.loopexit - addi.d $a0, $s7, 1 + addi.d $a0, $fp, 1 movgr2fr.w $fa0, $a0 ffint.d.w $fs0, $fa0 b .LBB6_7 @@ -11946,19 +11836,19 @@ s331: # @s331 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 move $a0, $zero - fld.d $fs0, $sp, 112 # 8-byte Folded Reload - ld.d $s8, $sp, 120 # 8-byte Folded Reload - ld.d $s7, $sp, 128 # 8-byte Folded Reload - ld.d $s6, $sp, 136 # 8-byte Folded Reload - ld.d $s5, $sp, 144 # 8-byte Folded Reload - ld.d $s4, $sp, 152 # 8-byte Folded Reload - ld.d $s3, $sp, 160 # 8-byte Folded Reload - ld.d $s2, $sp, 168 # 8-byte Folded Reload - ld.d $s1, $sp, 176 # 8-byte Folded Reload - ld.d $s0, $sp, 184 # 8-byte Folded Reload - ld.d $fp, $sp, 192 # 8-byte Folded Reload - ld.d $ra, $sp, 200 # 8-byte Folded Reload - addi.d $sp, $sp, 208 + fld.d $fs0, $sp, 96 # 8-byte Folded Reload + ld.d $s8, $sp, 104 # 8-byte Folded Reload + ld.d $s7, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 128 # 8-byte Folded Reload + ld.d $s4, $sp, 136 # 8-byte Folded Reload + ld.d $s3, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 152 # 8-byte Folded Reload + ld.d $s1, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload + ld.d $fp, $sp, 176 # 8-byte Folded Reload + ld.d $ra, $sp, 184 # 8-byte Folded Reload + addi.d $sp, $sp, 192 ret .Lfunc_end6: .size s331, .Lfunc_end6-s331 @@ -12236,245 +12126,161 @@ set: # @set bnez $a2, .LBB10_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB10_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB10_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB10_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB10_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB10_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB10_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB10_17: # %.preheader34.i47 +.LBB10_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -12549,45 +12355,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB10_17 + bnez $a4, .LBB10_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB10_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB10_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI10_0) - xvld $xr0, $a2, %pc_lo12(.LCPI10_0) - pcalau12i $a2, %pc_hi20(.LCPI10_1) - xvld $xr1, $a2, %pc_lo12(.LCPI10_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI10_0) + xvld $xr0, $a1, %pc_lo12(.LCPI10_0) + pcalau12i $a1, %pc_hi20(.LCPI10_1) + xvld $xr1, $a1, %pc_lo12(.LCPI10_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB10_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB10_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB10_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Searching-flt/CMakeFiles/Searching-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Searching-flt/CMakeFiles/Searching-flt.dir/tsc.s index 505ea68e..3340fd82 100644 --- a/results/MultiSource/Benchmarks/TSVC/Searching-flt/CMakeFiles/Searching-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Searching-flt/CMakeFiles/Searching-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10955,42 +10847,41 @@ init: # @init .type s331,@function s331: # @s331 # %bb.0: - addi.d $sp, $sp, -240 - st.d $ra, $sp, 232 # 8-byte Folded Spill - st.d $fp, $sp, 224 # 8-byte Folded Spill - st.d $s0, $sp, 216 # 8-byte Folded Spill - st.d $s1, $sp, 208 # 8-byte Folded Spill - st.d $s2, $sp, 200 # 8-byte Folded Spill - st.d $s3, $sp, 192 # 8-byte Folded Spill - st.d $s4, $sp, 184 # 8-byte Folded Spill - st.d $s5, $sp, 176 # 8-byte Folded Spill - st.d $s6, $sp, 168 # 8-byte Folded Spill - st.d $s7, $sp, 160 # 8-byte Folded Spill - st.d $s8, $sp, 152 # 8-byte Folded Spill - fst.d $fs0, $sp, 144 # 8-byte Folded Spill + addi.d $sp, $sp, -256 + st.d $ra, $sp, 248 # 8-byte Folded Spill + st.d $fp, $sp, 240 # 8-byte Folded Spill + st.d $s0, $sp, 232 # 8-byte Folded Spill + st.d $s1, $sp, 224 # 8-byte Folded Spill + st.d $s2, $sp, 216 # 8-byte Folded Spill + st.d $s3, $sp, 208 # 8-byte Folded Spill + st.d $s4, $sp, 200 # 8-byte Folded Spill + st.d $s5, $sp, 192 # 8-byte Folded Spill + st.d $s6, $sp, 184 # 8-byte Folded Spill + st.d $s7, $sp, 176 # 8-byte Folded Spill + st.d $s8, $sp, 168 # 8-byte Folded Spill + fst.d $fs0, $sp, 160 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.88) addi.d $a0, $a0, %pc_lo12(.L.str.88) pcaddu18i $ra, %call36(init) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(ntimes) - st.d $a0, $sp, 136 # 8-byte Folded Spill + st.d $a0, $sp, 152 # 8-byte Folded Spill ld.w $a0, $a0, %pc_lo12(ntimes) blez $a0, .LBB6_6 # %bb.1: # %vector.ph.preheader pcalau12i $a0, %pc_hi20(.LCPI6_0) xvld $xr0, $a0, %pc_lo12(.LCPI6_0) - xvst $xr0, $sp, 96 # 32-byte Folded Spill - lu12i.w $a0, -524288 - xvreplgr2vr.w $xr0, $a0 - xvst $xr0, $sp, 64 # 32-byte Folded Spill + xvst $xr0, $sp, 112 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) move $s8, $zero addi.d $a0, $a1, 32 - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 104 # 8-byte Folded Spill + xvldi $xr0, -3200 + xvst $xr0, $sp, 64 # 32-byte Folded Spill lu12i.w $a0, 7 ori $a0, $a0, 3328 - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill xvrepli.b $xr6, 0 addi.w $a0, $zero, -1 st.d $a0, $sp, 8 # 8-byte Folded Spill @@ -11014,18 +10905,18 @@ s331: # @s331 add.d $s5, $a1, $a0 lu12i.w $a0, 284 ori $a0, $a0, 1376 - move $fp, $a1 + move $s7, $a1 add.d $s6, $a1, $a0 xvst $xr6, $sp, 16 # 32-byte Folded Spill .p2align 4, , 16 .LBB6_2: # %vector.ph # =>This Loop Header: Depth=1 # Child Loop BB6_3 Depth 2 - ld.d $a0, $sp, 56 # 8-byte Folded Reload - ld.d $a1, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload xvld $xr1, $sp, 64 # 32-byte Folded Reload xvori.b $xr0, $xr1, 0 - xvld $xr2, $sp, 96 # 32-byte Folded Reload + xvld $xr2, $sp, 112 # 32-byte Folded Reload .p2align 4, , 16 .LBB6_3: # %vector.body # Parent Loop BB6_2 Depth=1 @@ -11057,10 +10948,10 @@ s331: # @s331 masknez $a0, $a0, $a1 ld.d $a2, $sp, 8 # 8-byte Folded Reload maskeqz $a1, $a2, $a1 - or $s7, $a1, $a0 - movgr2fr.w $fa0, $s7 + or $fp, $a1, $a0 + movgr2fr.w $fa0, $fp ffint.s.w $fa0, $fa0 - move $a0, $fp + move $a0, $s7 move $a1, $s0 move $a2, $s1 move $a3, $s2 @@ -11071,12 +10962,12 @@ s331: # @s331 pcaddu18i $ra, %call36(dummy) jirl $ra, $ra, 0 xvld $xr6, $sp, 16 # 32-byte Folded Reload - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(ntimes) addi.w $s8, $s8, 1 blt $s8, $a0, .LBB6_2 # %bb.5: # %._crit_edge.loopexit - addi.d $a0, $s7, 1 + addi.d $a0, $fp, 1 movgr2fr.w $fa0, $a0 ffint.s.w $fs0, $fa0 b .LBB6_7 @@ -11099,19 +10990,19 @@ s331: # @s331 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 move $a0, $zero - fld.d $fs0, $sp, 144 # 8-byte Folded Reload - ld.d $s8, $sp, 152 # 8-byte Folded Reload - ld.d $s7, $sp, 160 # 8-byte Folded Reload - ld.d $s6, $sp, 168 # 8-byte Folded Reload - ld.d $s5, $sp, 176 # 8-byte Folded Reload - ld.d $s4, $sp, 184 # 8-byte Folded Reload - ld.d $s3, $sp, 192 # 8-byte Folded Reload - ld.d $s2, $sp, 200 # 8-byte Folded Reload - ld.d $s1, $sp, 208 # 8-byte Folded Reload - ld.d $s0, $sp, 216 # 8-byte Folded Reload - ld.d $fp, $sp, 224 # 8-byte Folded Reload - ld.d $ra, $sp, 232 # 8-byte Folded Reload - addi.d $sp, $sp, 240 + fld.d $fs0, $sp, 160 # 8-byte Folded Reload + ld.d $s8, $sp, 168 # 8-byte Folded Reload + ld.d $s7, $sp, 176 # 8-byte Folded Reload + ld.d $s6, $sp, 184 # 8-byte Folded Reload + ld.d $s5, $sp, 192 # 8-byte Folded Reload + ld.d $s4, $sp, 200 # 8-byte Folded Reload + ld.d $s3, $sp, 208 # 8-byte Folded Reload + ld.d $s2, $sp, 216 # 8-byte Folded Reload + ld.d $s1, $sp, 224 # 8-byte Folded Reload + ld.d $s0, $sp, 232 # 8-byte Folded Reload + ld.d $fp, $sp, 240 # 8-byte Folded Reload + ld.d $ra, $sp, 248 # 8-byte Folded Reload + addi.d $sp, $sp, 256 ret .Lfunc_end6: .size s331, .Lfunc_end6-s331 @@ -11401,181 +11292,129 @@ set: # @set bnez $a2, .LBB10_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB10_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB10_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB10_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB10_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB10_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB10_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB10_17: # %.preheader34.i47 +.LBB10_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -11618,45 +11457,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB10_17 + bnez $a4, .LBB10_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB10_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB10_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI10_0) - xvld $xr0, $a2, %pc_lo12(.LCPI10_0) - pcalau12i $a2, %pc_hi20(.LCPI10_1) - xvld $xr1, $a2, %pc_lo12(.LCPI10_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI10_0) + xvld $xr0, $a1, %pc_lo12(.LCPI10_0) + pcalau12i $a1, %pc_hi20(.LCPI10_1) + xvld $xr1, $a1, %pc_lo12(.LCPI10_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB10_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB10_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB10_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/StatementReordering-dbl/CMakeFiles/StatementReordering-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/StatementReordering-dbl/CMakeFiles/StatementReordering-dbl.dir/tsc.s index e9a1d8ee..7f86810c 100644 --- a/results/MultiSource/Benchmarks/TSVC/StatementReordering-dbl/CMakeFiles/StatementReordering-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/StatementReordering-dbl/CMakeFiles/StatementReordering-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12301,245 +12193,161 @@ set: # @set bnez $a2, .LBB10_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB10_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB10_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB10_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB10_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB10_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB10_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB10_17: # %.preheader34.i47 +.LBB10_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -12614,45 +12422,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB10_17 + bnez $a4, .LBB10_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB10_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB10_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI10_0) - xvld $xr0, $a2, %pc_lo12(.LCPI10_0) - pcalau12i $a2, %pc_hi20(.LCPI10_1) - xvld $xr1, $a2, %pc_lo12(.LCPI10_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI10_0) + xvld $xr0, $a1, %pc_lo12(.LCPI10_0) + pcalau12i $a1, %pc_hi20(.LCPI10_1) + xvld $xr1, $a1, %pc_lo12(.LCPI10_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB10_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB10_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB10_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/StatementReordering-flt/CMakeFiles/StatementReordering-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/StatementReordering-flt/CMakeFiles/StatementReordering-flt.dir/tsc.s index f417c2db..5d33ff00 100644 --- a/results/MultiSource/Benchmarks/TSVC/StatementReordering-flt/CMakeFiles/StatementReordering-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/StatementReordering-flt/CMakeFiles/StatementReordering-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11465,181 +11357,129 @@ set: # @set bnez $a2, .LBB10_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB10_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB10_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB10_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB10_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB10_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB10_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB10_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB10_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB10_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB10_17: # %.preheader34.i47 +.LBB10_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -11682,45 +11522,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB10_17 + bnez $a4, .LBB10_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB10_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB10_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI10_0) - xvld $xr0, $a2, %pc_lo12(.LCPI10_0) - pcalau12i $a2, %pc_hi20(.LCPI10_1) - xvld $xr1, $a2, %pc_lo12(.LCPI10_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI10_0) + xvld $xr0, $a1, %pc_lo12(.LCPI10_0) + pcalau12i $a1, %pc_hi20(.LCPI10_1) + xvld $xr1, $a1, %pc_lo12(.LCPI10_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB10_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB10_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB10_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Symbolics-dbl/CMakeFiles/Symbolics-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Symbolics-dbl/CMakeFiles/Symbolics-dbl.dir/tsc.s index bb0f784a..320d7a9a 100644 --- a/results/MultiSource/Benchmarks/TSVC/Symbolics-dbl/CMakeFiles/Symbolics-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Symbolics-dbl/CMakeFiles/Symbolics-dbl.dir/tsc.s @@ -2025,8 +2025,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_151: # %vector.body6187 # =>This Inner Loop Header: Depth=1 @@ -2135,8 +2134,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_161: # %vector.body6172 # =>This Inner Loop Header: Depth=1 @@ -2176,8 +2174,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_165: # %vector.body6157 # =>This Inner Loop Header: Depth=1 @@ -2385,8 +2382,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_173: # %vector.body6117 # =>This Inner Loop Header: Depth=1 @@ -2634,8 +2630,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_181: # %vector.body6111 # =>This Inner Loop Header: Depth=1 @@ -2651,8 +2646,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_183: # %vector.body6099 # =>This Inner Loop Header: Depth=1 @@ -2750,8 +2744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2912,8 +2905,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_191: # %vector.body6070 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2951,8 +2943,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_195: # %vector.body6055 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2997,8 +2988,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_199: # %vector.body6025 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3079,8 +3069,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_207: # %vector.body5995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3162,8 +3151,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3239,8 +3227,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1022 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -928 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3316,8 +3303,7 @@ init: # @init ori $a1, $a1, 3488 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -3396,8 +3382,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3588,8 +3573,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_227: # %vector.body5921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3681,8 +3665,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -1024 .LBB5_235: # %vector.body5903 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3695,8 +3678,7 @@ init: # @init ori $a1, $a1, 96 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_237: # %vector.body5909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3723,8 +3705,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_241: # %vector.body5888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3763,8 +3744,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -3889,8 +3869,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_251: # %vector.body5850 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3985,8 +3964,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_255: # %vector.body5835 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4024,8 +4002,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_259: # %vector.body5802 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -4114,35 +4091,35 @@ init: # @init .LBB5_266: # %vector.body5759.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_267: # %vector.body5759 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5765.preheader - lu12i.w $a3, 62 - ori $a4, $a3, 2096 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 62 + ori $a3, $a2, 2096 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu52i.d $a6, $zero, 1023 .LBB5_269: # %vector.body5765 # =>This Inner Loop Header: Depth=1 - st.d $a2, $a5, -16 - st.d $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 32 - bnez $a6, .LBB5_269 + st.d $a6, $a4, -16 + st.d $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 32 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5771.preheader - ori $a2, $a3, 2104 + ori $a2, $a2, 2104 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu52i.d $a4, $zero, -1025 .LBB5_271: # %vector.body5771 # =>This Inner Loop Header: Depth=1 @@ -4217,8 +4194,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_279: # %vector.body5735 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4280,8 +4256,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_285: # %vector.body5720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4319,8 +4294,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_289: # %vector.body5705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4358,8 +4332,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_293: # %vector.body5690 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4397,8 +4370,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_297: # %vector.body5675 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4436,8 +4408,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_301: # %vector.body5660 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4475,8 +4446,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_305: # %vector.body5636 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4545,8 +4515,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_311: # %vector.body5603 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4655,8 +4624,7 @@ init: # @init ori $a3, $a3, 2112 add.d $a3, $a1, $a3 ori $a4, $a2, 3328 - lu52i.d $a5, $zero, 1023 - xvreplgr2vr.d $xr0, $a5 + xvldi $xr0, -912 .LBB5_321: # %vector.body5582 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a3, -32 @@ -4706,8 +4674,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_327: # %vector.body5540 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4799,8 +4766,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_335: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4828,8 +4794,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -4989,8 +4954,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5328,8 +5292,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -5571,8 +5534,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_360: # %vector.body5424 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5790,8 +5752,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_370: # %vector.body5400 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5904,8 +5865,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_386: # %vector.body5343 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5997,8 +5957,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_394: # %vector.body5325 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6049,8 +6008,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_400: # %vector.body5292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6142,8 +6100,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_408: # %vector.body5280 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6170,8 +6127,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_412: # %vector.body5253 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6246,8 +6202,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_420: # %vector.body5247 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6269,8 +6224,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_422: # %vector.body5241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6285,8 +6239,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_424: # %vector.body5223 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6299,8 +6252,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6453,8 +6405,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_430: # %vector.body5205 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6467,8 +6418,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1024 - xvreplgr2vr.d $xr1, $a3 + xvldi $xr1, -1024 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -1024 @@ -6790,8 +6740,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_444: # %vector.body5140 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6863,8 +6812,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_452: # %vector.body5116 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6926,8 +6874,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_458: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6999,8 +6946,7 @@ init: # @init ori $a2, $a2, 192 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1024 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -1024 .LBB5_466: # %vector.body5110 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7015,8 +6961,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_468: # %vector.body5047 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7103,8 +7048,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_478: # %vector.body5017 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7179,8 +7123,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -7413,8 +7356,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_492: # %vector.body4966 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7499,8 +7441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_500: # %vector.body4921 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7525,8 +7466,7 @@ init: # @init ori $a2, $a2, 3136 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_504: # %vector.body4933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7611,8 +7551,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_512: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7625,8 +7564,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_514: # %vector.body4882 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7723,8 +7661,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, -1025 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -784 .LBB5_524: # %vector.body4831 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7737,8 +7674,7 @@ init: # @init ori $a2, $a2, 1056 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_526: # %vector.body4837 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7835,8 +7771,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_536: # %vector.body4792 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7933,8 +7868,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_546: # %vector.body4768 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7996,8 +7930,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_552: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8066,8 +7999,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_558: # %vector.body4732 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8101,8 +8033,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_562: # %vector.body4726 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8124,8 +8055,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_564: # %vector.body4720 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8166,8 +8096,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -8870,8 +8799,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_608: # %vector.body4552 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8891,8 +8819,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_611: # %vector.body4546 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -8923,8 +8850,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_614: # %vector.body4507 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9245,8 +9171,7 @@ init: # @init ori $a1, $a1, 3392 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_639: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -1024 @@ -9321,30 +9246,29 @@ init: # @init .LBB5_640: # %vector.body4436.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_641: # %vector.body4436 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_641 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_641 # %bb.642: # %vector.body4442.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_643: # %vector.body4442 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_643 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_643 b .LBB5_652 .LBB5_644: # %vector.body4418.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) @@ -9398,34 +9322,34 @@ init: # @init .LBB5_648: # %vector.body4406.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -912 .LBB5_649: # %vector.body4406 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_649 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_649 # %bb.650: # %vector.body4412.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2112 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2112 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_651: # %vector.body4412 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -8 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_651 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -8 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_651 .LBB5_652: # %set1d.exit2374 - lu12i.w $a2, 125 - ori $a2, $a2, 64 - stx.d $a1, $a0, $a2 + lu12i.w $a1, 125 + ori $a1, $a1, 64 + lu52i.d $a2, $zero, 1023 + stx.d $a2, $a0, $a1 b .LBB5_573 .LBB5_653: # %vector.body4382.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9433,8 +9357,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_654: # %vector.body4382 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9496,8 +9419,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_660: # %vector.body4358 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9566,8 +9488,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_666: # %vector.body4328 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9642,8 +9563,7 @@ init: # @init ori $a1, $a1, 3296 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_674: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -1024 @@ -9885,8 +9805,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_680: # %vector.body4282 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9975,8 +9894,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 .LBB5_689: # %vector.body4276 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9990,8 +9908,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_691: # %vector.body4252 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10058,8 +9975,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_697: # %vector.body4228 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10096,8 +10012,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_701: # %vector.body4213 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10135,8 +10050,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_705: # %vector.body4198 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10174,8 +10088,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -912 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_709: # %vector.body4162 # =>This Inner Loop Header: Depth=1 @@ -10237,8 +10150,7 @@ init: # @init ori $a0, $a0, 2176 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu52i.d $a3, $zero, -1025 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -784 .LBB5_715: # %vector.body4186 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10272,8 +10184,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_719: # %vector.body4120 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10381,8 +10292,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_729: # %vector.body4096 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10499,23 +10409,22 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 2112 - add.d $a1, $fp, $a0 - lu12i.w $a0, 7 - ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + add.d $a0, $fp, $a0 + lu12i.w $a1, 7 + ori $a2, $a1, 3328 + xvldi $xr0, -912 .LBB5_739: # %vector.body4066 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 addi.d $a2, $a2, -8 - addi.d $a1, $a1, 64 + addi.d $a0, $a0, 64 bnez $a2, .LBB5_739 # %bb.740: # %vector.body4072.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 96 - add.d $a1, $fp, $a1 - ori $a0, $a0, 3328 + lu12i.w $a0, 125 + ori $a0, $a0, 96 + add.d $a0, $fp, $a0 + ori $a1, $a1, 3328 lu12i.w $a2, -390306 ori $a2, $a2, 3469 lu32i.d $a2, 50935 @@ -10523,11 +10432,11 @@ init: # @init xvreplgr2vr.d $xr0, $a2 .LBB5_741: # %vector.body4072 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 - addi.d $a0, $a0, -8 - addi.d $a1, $a1, 64 - bnez $a0, .LBB5_741 + xvst $xr0, $a0, -32 + xvst $xr0, $a0, 0 + addi.d $a1, $a1, -8 + addi.d $a0, $a0, 64 + bnez $a1, .LBB5_741 b .LBB5_573 .LBB5_742: # %.preheader.i2620.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -10567,8 +10476,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_745: # %vector.body4021 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10654,8 +10562,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_755: # %vector.body3988 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10740,8 +10647,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_763: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10810,8 +10716,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_769: # %vector.body3940 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10873,8 +10778,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_775: # %vector.body3925 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10920,8 +10824,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_779: # %vector.body3910 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10966,8 +10869,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_783: # %vector.body3886 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11193,8 +11095,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .LBB5_797: # %vector.body3827 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -11256,8 +11157,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_803: # %vector.body3803 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11479,8 +11379,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_819: # %vector.body3746 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11507,8 +11406,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_823: # %vector.body3722 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11570,8 +11468,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_829: # %vector.body3707 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11632,8 +11529,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_835: # %vector.body3695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11646,8 +11542,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, -1025 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -784 .LBB5_837: # %vector.body3701 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -11662,8 +11557,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -912 .LBB5_839: # %vector.body3668 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11676,8 +11570,7 @@ init: # @init ori $a2, $a2, 2112 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu52i.d $a4, $zero, 1024 - xvreplgr2vr.d $xr0, $a4 + xvldi $xr0, -1024 .LBB5_841: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -11690,8 +11583,7 @@ init: # @init ori $a2, $a2, 96 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 + xvldi $xr0, -928 .LBB5_843: # %vector.body3680 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12900,245 +12792,161 @@ set: # @set bnez $a2, .LBB13_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -912 .p2align 4, , 16 .LBB13_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 2112 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 2112 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 96 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 96 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 187 - ori $a3, $a3, 2176 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 187 + ori $a2, $a2, 2176 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 250 - ori $a3, $a3, 192 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 250 + ori $a2, $a2, 192 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -8 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -8 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 312 - ori $a3, $a3, 3296 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 312 + ori $a2, $a2, 3296 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB13_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB13_13 + xvst $xr0, $a2, -1024 + xvst $xr0, $a2, -992 + xvst $xr0, $a2, -960 + xvst $xr0, $a2, -928 + xvst $xr0, $a2, -896 + xvst $xr0, $a2, -864 + xvst $xr0, $a2, -832 + xvst $xr0, $a2, -800 + xvst $xr0, $a2, -768 + xvst $xr0, $a2, -736 + xvst $xr0, $a2, -704 + xvst $xr0, $a2, -672 + xvst $xr0, $a2, -640 + xvst $xr0, $a2, -608 + xvst $xr0, $a2, -576 + xvst $xr0, $a2, -544 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + xvst $xr0, $a2, 512 + xvst $xr0, $a2, 544 + xvst $xr0, $a2, 576 + xvst $xr0, $a2, 608 + xvst $xr0, $a2, 640 + xvst $xr0, $a2, 672 + xvst $xr0, $a2, 704 + xvst $xr0, $a2, 736 + xvst $xr0, $a2, 768 + xvst $xr0, $a2, 800 + xvst $xr0, $a2, 832 + xvst $xr0, $a2, 864 + xvst $xr0, $a2, 896 + xvst $xr0, $a2, 928 + xvst $xr0, $a2, 960 + xvst $xr0, $a2, 992 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 2047 + addi.d $a2, $a2, 1 + bnez $a4, .LBB13_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 440 - ori $a3, $a3, 3392 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB13_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - xvreplve0.d $xr0, $xr0 - xvst $xr0, $a3, -1024 - xvst $xr0, $a3, -992 - xvst $xr0, $a3, -960 - xvst $xr0, $a3, -928 - xvst $xr0, $a3, -896 - xvst $xr0, $a3, -864 - xvst $xr0, $a3, -832 - xvst $xr0, $a3, -800 - xvst $xr0, $a3, -768 - xvst $xr0, $a3, -736 - xvst $xr0, $a3, -704 - xvst $xr0, $a3, -672 - xvst $xr0, $a3, -640 - xvst $xr0, $a3, -608 - xvst $xr0, $a3, -576 - xvst $xr0, $a3, -544 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - xvst $xr0, $a3, 512 - xvst $xr0, $a3, 544 - xvst $xr0, $a3, 576 - xvst $xr0, $a3, 608 - xvst $xr0, $a3, 640 - xvst $xr0, $a3, 672 - xvst $xr0, $a3, 704 - xvst $xr0, $a3, 736 - xvst $xr0, $a3, 768 - xvst $xr0, $a3, 800 - xvst $xr0, $a3, 832 - xvst $xr0, $a3, 864 - xvst $xr0, $a3, 896 - xvst $xr0, $a3, 928 - xvst $xr0, $a3, 960 - xvst $xr0, $a3, 992 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 2047 - addi.d $a3, $a3, 1 - bnez $a5, .LBB13_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 568 - ori $a3, $a3, 3488 - add.d $a2, $a2, $a3 + lu12i.w $a2, 440 + ori $a2, $a2, 3392 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB13_17: # %.preheader34.i47 +.LBB13_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -13213,45 +13021,129 @@ set: # @set addi.d $a4, $a4, -1 addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 - bnez $a4, .LBB13_17 + bnez $a4, .LBB13_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 568 + ori $a2, $a2, 3488 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB13_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + xvreplve0.d $xr0, $xr0 + xvst $xr0, $a1, -1024 + xvst $xr0, $a1, -992 + xvst $xr0, $a1, -960 + xvst $xr0, $a1, -928 + xvst $xr0, $a1, -896 + xvst $xr0, $a1, -864 + xvst $xr0, $a1, -832 + xvst $xr0, $a1, -800 + xvst $xr0, $a1, -768 + xvst $xr0, $a1, -736 + xvst $xr0, $a1, -704 + xvst $xr0, $a1, -672 + xvst $xr0, $a1, -640 + xvst $xr0, $a1, -608 + xvst $xr0, $a1, -576 + xvst $xr0, $a1, -544 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + xvst $xr0, $a1, 512 + xvst $xr0, $a1, 544 + xvst $xr0, $a1, 576 + xvst $xr0, $a1, 608 + xvst $xr0, $a1, 640 + xvst $xr0, $a1, 672 + xvst $xr0, $a1, 704 + xvst $xr0, $a1, 736 + xvst $xr0, $a1, 768 + xvst $xr0, $a1, 800 + xvst $xr0, $a1, 832 + xvst $xr0, $a1, 864 + xvst $xr0, $a1, 896 + xvst $xr0, $a1, 928 + xvst $xr0, $a1, 960 + xvst $xr0, $a1, 992 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 2047 + addi.d $a1, $a1, 1 + bnez $a3, .LBB13_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI13_0) - xvld $xr0, $a2, %pc_lo12(.LCPI13_0) - pcalau12i $a2, %pc_hi20(.LCPI13_1) - xvld $xr1, $a2, %pc_lo12(.LCPI13_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI13_0) + xvld $xr0, $a1, %pc_lo12(.LCPI13_0) + pcalau12i $a1, %pc_hi20(.LCPI13_1) + xvld $xr1, $a1, %pc_lo12(.LCPI13_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB13_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB13_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB13_19 # %bb.20: # %middle.block122 + lu52i.d $a0, $zero, 1023 st.d $a0, $s0, 0 lu52i.d $a0, $zero, 1024 st.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/TSVC/Symbolics-flt/CMakeFiles/Symbolics-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Symbolics-flt/CMakeFiles/Symbolics-flt.dir/tsc.s index 74cf968e..93c0dd87 100644 --- a/results/MultiSource/Benchmarks/TSVC/Symbolics-flt/CMakeFiles/Symbolics-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Symbolics-flt/CMakeFiles/Symbolics-flt.dir/tsc.s @@ -1900,8 +1900,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_151: # %vector.body5756 # =>This Inner Loop Header: Depth=1 @@ -2071,8 +2070,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_161: # %vector.body5742 # =>This Inner Loop Header: Depth=1 @@ -2128,8 +2126,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_165: # %vector.body5728 # =>This Inner Loop Header: Depth=1 @@ -2289,8 +2286,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_173: # %vector.body5688 # =>This Inner Loop Header: Depth=1 @@ -2437,8 +2433,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_181: # %vector.body5682 # =>This Inner Loop Header: Depth=1 @@ -2454,8 +2449,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_183: # %vector.body5670 # =>This Inner Loop Header: Depth=1 @@ -2518,8 +2512,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB5_187: # %.preheader.i1144 # =>This Inner Loop Header: Depth=1 @@ -2615,8 +2608,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_191: # %vector.body5642 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2670,8 +2662,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_195: # %vector.body5628 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -2732,8 +2723,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_199: # %vector.body5604 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2817,8 +2807,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_207: # %vector.body5580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -2903,8 +2892,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_215: # %.preheader.i1220 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2947,8 +2935,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 258048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3265 .LBB5_217: # %.preheader.i1227 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -2991,8 +2978,7 @@ init: # @init ori $a1, $a1, 1888 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_219: # %.preheader.i1234 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -3038,8 +3024,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_221: # %.preheader.i1241 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3166,8 +3151,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_227: # %vector.body5518 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3264,8 +3248,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3264 .LBB5_235: # %vector.body5500 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3278,8 +3261,7 @@ init: # @init ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_237: # %vector.body5506 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -3306,8 +3288,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_241: # %vector.body5486 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3362,8 +3343,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_245: # %.preheader.i1309 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -3458,8 +3438,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_251: # %vector.body5454 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3522,8 +3501,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_255: # %vector.body5440 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3577,8 +3555,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_259: # %vector.body5416 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -3672,35 +3649,35 @@ init: # @init .LBB5_266: # %vector.body5379.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 + addi.d $a2, $a0, 32 lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_267: # %vector.body5379 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_267 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_267 # %bb.268: # %vector.body5385.preheader - lu12i.w $a3, 31 - ori $a4, $a3, 1048 - add.d $a5, $a0, $a4 - lu12i.w $a4, 3 - ori $a6, $a4, 3712 + lu12i.w $a2, 31 + ori $a3, $a2, 1048 + add.d $a4, $a0, $a3 + lu12i.w $a3, 3 + ori $a5, $a3, 3712 + lu12i.w $a6, 260096 .LBB5_269: # %vector.body5385 # =>This Inner Loop Header: Depth=1 - st.w $a2, $a5, -8 - st.w $a2, $a5, 0 - addi.d $a6, $a6, -2 - addi.d $a5, $a5, 16 - bnez $a6, .LBB5_269 + st.w $a6, $a4, -8 + st.w $a6, $a4, 0 + addi.d $a5, $a5, -2 + addi.d $a4, $a4, 16 + bnez $a5, .LBB5_269 # %bb.270: # %vector.body5391.preheader - ori $a2, $a3, 1052 + ori $a2, $a2, 1052 add.d $a2, $a0, $a2 - ori $a3, $a4, 3712 + ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 .LBB5_271: # %vector.body5391 @@ -3779,8 +3756,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_279: # %vector.body5361 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3845,8 +3821,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_285: # %vector.body5347 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3900,8 +3875,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_289: # %vector.body5333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -3955,8 +3929,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_293: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4010,8 +3983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_297: # %vector.body5305 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4065,8 +4037,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_301: # %vector.body5291 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4120,8 +4091,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_305: # %vector.body5273 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4193,8 +4163,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_311: # %vector.body5249 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4309,8 +4278,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_321: # %vector.body5231 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4362,8 +4330,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_327: # %vector.body5201 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -4460,8 +4427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_335: # %vector.body5189 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -4489,8 +4455,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_339: # %.preheader.i1541 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4585,8 +4550,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_343: # %.preheader.i1556 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4795,8 +4759,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_354: # %.preheader.i1594 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -4941,8 +4904,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_360: # %vector.body5091 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5098,8 +5060,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_370: # %vector.body5067 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5210,8 +5171,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_386: # %vector.body5019 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5308,8 +5268,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_394: # %vector.body5001 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5358,8 +5317,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_400: # %vector.body4977 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5456,8 +5414,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_408: # %vector.body4965 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5484,8 +5441,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_412: # %vector.body4941 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -5559,8 +5515,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_420: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5582,8 +5537,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_422: # %vector.body4929 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -5598,8 +5552,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_424: # %vector.body4911 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5612,8 +5565,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_426: # %.preheader.i1789 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5700,8 +5652,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_430: # %vector.body4893 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -5714,8 +5665,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 262144 - xvreplgr2vr.w $xr1, $a3 + xvldi $xr1, -3264 .LBB5_432: # %.preheader.i1807 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a1, -512 @@ -5944,8 +5894,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_444: # %vector.body4839 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6048,8 +5997,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_452: # %vector.body4821 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6114,8 +6062,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_458: # %vector.body4791 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6190,8 +6137,7 @@ init: # @init ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 262144 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3264 .LBB5_466: # %vector.body4815 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -6206,8 +6152,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_468: # %vector.body4761 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6293,8 +6238,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_478: # %vector.body4737 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -6372,8 +6316,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_486: # %.preheader.i1949 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -6505,8 +6448,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_492: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6596,8 +6538,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_500: # %vector.body4659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6622,8 +6563,7 @@ init: # @init ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_504: # %vector.body4671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6713,8 +6653,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_512: # %vector.body4623 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6726,8 +6665,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_514: # %vector.body4629 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6829,8 +6767,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, -264192 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1296 .LBB5_524: # %vector.body4587 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6842,8 +6779,7 @@ init: # @init addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_526: # %vector.body4593 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -6945,8 +6881,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_536: # %vector.body4557 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7048,8 +6983,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_546: # %vector.body4539 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7114,8 +7048,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_552: # %vector.body4521 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -7187,8 +7120,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_558: # %vector.body4509 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7222,8 +7154,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_562: # %vector.body4503 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7245,8 +7176,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_564: # %vector.body4497 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7288,8 +7218,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_568: # %.preheader.i2154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -7892,8 +7821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_609: # %vector.body4369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -7913,8 +7841,7 @@ init: # @init addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_612: # %vector.body4363 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -7945,8 +7872,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_615: # %vector.body4333 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8262,8 +8188,7 @@ init: # @init ori $a1, $a1, 1808 add.d $a0, $a0, $a1 ori $a1, $zero, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_640: # %.preheader.i2360 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -512 @@ -8305,30 +8230,29 @@ init: # @init .LBB5_641: # %vector.body4275.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_642: # %vector.body4275 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_642 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_642 # %bb.643: # %vector.body4281.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_644: # %vector.body4281 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_644 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_644 b .LBB5_653 .LBB5_645: # %vector.body4263.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8385,34 +8309,34 @@ init: # @init .LBB5_649: # %vector.body4251.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a3, $a0, 32 - lu12i.w $a2, 7 - ori $a4, $a2, 3328 - lu12i.w $a1, 260096 - xvreplgr2vr.w $xr0, $a1 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 + xvldi $xr0, -1424 .LBB5_650: # %vector.body4251 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB5_650 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB5_650 # %bb.651: # %vector.body4257.preheader - lu12i.w $a3, 31 - ori $a3, $a3, 1072 - add.d $a3, $a0, $a3 - ori $a2, $a2, 3328 + lu12i.w $a2, 31 + ori $a2, $a2, 1072 + add.d $a2, $a0, $a2 + ori $a1, $a1, 3328 .LBB5_652: # %vector.body4257 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a2, $a2, -16 - addi.d $a3, $a3, 64 - bnez $a2, .LBB5_652 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a1, $a1, -16 + addi.d $a2, $a2, 64 + bnez $a1, .LBB5_652 .LBB5_653: # %set1d.exit2374 - lu12i.w $a2, 62 - ori $a2, $a2, 2096 - stx.w $a1, $a0, $a2 + lu12i.w $a1, 62 + ori $a1, $a1, 2096 + lu12i.w $a2, 260096 + stx.w $a2, $a0, $a1 b .LBB5_573 .LBB5_654: # %vector.body4233.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -8420,8 +8344,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_655: # %vector.body4233 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8486,8 +8409,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_661: # %vector.body4215 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -8559,8 +8481,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_667: # %vector.body4191 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8638,8 +8559,7 @@ init: # @init ori $a1, $a1, 1728 add.d $a1, $a0, $a1 ori $a2, $zero, 256 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_675: # %.preheader.i2443 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -512 @@ -8784,8 +8704,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_681: # %vector.body4151 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8893,8 +8812,7 @@ init: # @init pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 .LBB5_691: # %vector.body4145 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -8908,8 +8826,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_693: # %vector.body4123 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9008,8 +8925,7 @@ init: # @init addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_699: # %vector.body4101 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9062,8 +8978,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_703: # %vector.body4087 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9117,8 +9032,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_707: # %vector.body4073 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9172,8 +9086,7 @@ init: # @init addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill .LBB5_711: # %vector.body4043 # =>This Inner Loop Header: Depth=1 @@ -9238,8 +9151,7 @@ init: # @init ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 - lu12i.w $a3, -264192 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1296 .LBB5_717: # %vector.body4061 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -9274,8 +9186,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_721: # %vector.body4013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9390,8 +9301,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_731: # %vector.body3995 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9517,8 +9427,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_741: # %vector.body3971 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9596,8 +9505,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_747: # %vector.body3933 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9686,8 +9594,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_757: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9777,8 +9684,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_765: # %vector.body3891 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -9850,8 +9756,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_771: # %vector.body3873 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9916,8 +9821,7 @@ init: # @init addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_777: # %vector.body3861 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -9964,8 +9868,7 @@ init: # @init add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a2, $a1, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_781: # %vector.body3847 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -10026,8 +9929,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_785: # %vector.body3829 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10228,8 +10130,7 @@ init: # @init add.d $a1, $fp, $a0 lu12i.w $a0, 7 ori $a2, $a0, 3328 - lu12i.w $a3, 260096 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -1424 .LBB5_799: # %vector.body3785 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10294,8 +10195,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_805: # %vector.body3767 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10600,8 +10500,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_821: # %vector.body3715 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10628,8 +10527,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_825: # %vector.body3697 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10694,8 +10592,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_831: # %vector.body3683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10789,8 +10686,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a1, $a2 ori $a3, $a0, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_837: # %vector.body3671 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10803,8 +10699,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 - lu12i.w $a2, -264192 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1296 .LBB5_839: # %vector.body3677 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 @@ -10819,8 +10714,7 @@ init: # @init addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 - lu12i.w $a4, 260096 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -1424 .LBB5_841: # %vector.body3647 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10833,8 +10727,7 @@ init: # @init ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 - lu12i.w $a4, 262144 - xvreplgr2vr.w $xr0, $a4 + xvldi $xr0, -3264 .LBB5_843: # %vector.body3653 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 @@ -10847,8 +10740,7 @@ init: # @init ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 - lu12i.w $a2, 258048 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -3265 .LBB5_845: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 @@ -12108,181 +12000,129 @@ set: # @set bnez $a2, .LBB13_1 # %bb.2: # %vector.body67.preheader pcalau12i $a0, %pc_hi20(global_data) - addi.d $a2, $a0, %pc_lo12(global_data) - addi.d $a3, $a2, 32 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + addi.d $a1, $a0, %pc_lo12(global_data) + addi.d $a2, $a1, 32 + lu12i.w $a0, 7 + ori $a3, $a0, 3328 + xvldi $xr0, -1424 .p2align 4, , 16 .LBB13_3: # %vector.body67 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_3 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_3 # %bb.4: # %vector.body73.preheader - ori $a3, $s2, 1072 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + ori $a2, $s2, 1072 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_5: # %vector.body73 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_5 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_5 # %bb.6: # %vector.body79.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2128 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 62 + ori $a2, $a2, 2128 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_7: # %vector.body79 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_7 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_7 # %bb.8: # %vector.body85.preheader - lu12i.w $a3, 93 - ori $a3, $a3, 3184 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 93 + ori $a2, $a2, 3184 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_9: # %vector.body85 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_9 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_9 # %bb.10: # %vector.body91.preheader - lu12i.w $a3, 125 - ori $a3, $a3, 160 - add.d $a3, $a2, $a3 - ori $a4, $a1, 3328 + lu12i.w $a2, 125 + ori $a2, $a2, 160 + add.d $a2, $a1, $a2 + ori $a3, $a0, 3328 .p2align 4, , 16 .LBB13_11: # %vector.body91 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - addi.d $a4, $a4, -16 - addi.d $a3, $a3, 64 - bnez $a4, .LBB13_11 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + addi.d $a3, $a3, -16 + addi.d $a2, $a2, 64 + bnez $a3, .LBB13_11 # %bb.12: # %.preheader34.i.preheader - lu12i.w $a3, 156 - ori $a3, $a3, 1728 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 + lu12i.w $a2, 156 + ori $a2, $a2, 1728 + add.d $a2, $a1, $a2 + ori $a3, $zero, 1 + ori $a4, $zero, 256 .p2align 4, , 16 .LBB13_13: # %.preheader34.i # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 + bstrpick.d $a5, $a3, 31, 0 + movgr2fr.d $fa0, $a5 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 fcvt.s.d $fa0, $fa0 xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB13_13 + xvst $xr0, $a2, -512 + xvst $xr0, $a2, -480 + xvst $xr0, $a2, -448 + xvst $xr0, $a2, -416 + xvst $xr0, $a2, -384 + xvst $xr0, $a2, -352 + xvst $xr0, $a2, -320 + xvst $xr0, $a2, -288 + xvst $xr0, $a2, -256 + xvst $xr0, $a2, -224 + xvst $xr0, $a2, -192 + xvst $xr0, $a2, -160 + xvst $xr0, $a2, -128 + xvst $xr0, $a2, -96 + xvst $xr0, $a2, -64 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 + xvst $xr0, $a2, 32 + xvst $xr0, $a2, 64 + xvst $xr0, $a2, 96 + xvst $xr0, $a2, 128 + xvst $xr0, $a2, 160 + xvst $xr0, $a2, 192 + xvst $xr0, $a2, 224 + xvst $xr0, $a2, 256 + xvst $xr0, $a2, 288 + xvst $xr0, $a2, 320 + xvst $xr0, $a2, 352 + xvst $xr0, $a2, 384 + xvst $xr0, $a2, 416 + xvst $xr0, $a2, 448 + xvst $xr0, $a2, 480 + addi.w $a3, $a3, 1 + addi.d $a4, $a4, -1 + addi.d $a2, $a2, 1024 + bnez $a4, .LBB13_13 # %bb.14: # %.preheader34.i39.preheader - lu12i.w $a3, 220 - ori $a3, $a3, 1808 - add.d $a3, $a2, $a3 - ori $a4, $zero, 1 - ori $a5, $zero, 256 - .p2align 4, , 16 -.LBB13_15: # %.preheader34.i39 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - frecip.d $fa0, $fa0 - fcvt.s.d $fa0, $fa0 - xvreplve0.w $xr0, $xr0 - xvst $xr0, $a3, -512 - xvst $xr0, $a3, -480 - xvst $xr0, $a3, -448 - xvst $xr0, $a3, -416 - xvst $xr0, $a3, -384 - xvst $xr0, $a3, -352 - xvst $xr0, $a3, -320 - xvst $xr0, $a3, -288 - xvst $xr0, $a3, -256 - xvst $xr0, $a3, -224 - xvst $xr0, $a3, -192 - xvst $xr0, $a3, -160 - xvst $xr0, $a3, -128 - xvst $xr0, $a3, -96 - xvst $xr0, $a3, -64 - xvst $xr0, $a3, -32 - xvst $xr0, $a3, 0 - xvst $xr0, $a3, 32 - xvst $xr0, $a3, 64 - xvst $xr0, $a3, 96 - xvst $xr0, $a3, 128 - xvst $xr0, $a3, 160 - xvst $xr0, $a3, 192 - xvst $xr0, $a3, 224 - xvst $xr0, $a3, 256 - xvst $xr0, $a3, 288 - xvst $xr0, $a3, 320 - xvst $xr0, $a3, 352 - xvst $xr0, $a3, 384 - xvst $xr0, $a3, 416 - xvst $xr0, $a3, 448 - xvst $xr0, $a3, 480 - addi.w $a4, $a4, 1 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 1024 - bnez $a5, .LBB13_15 -# %bb.16: # %.preheader34.i47.preheader - lu12i.w $a3, 284 - ori $a3, $a3, 1888 - add.d $a2, $a2, $a3 + lu12i.w $a2, 220 + ori $a2, $a2, 1808 + add.d $a2, $a1, $a2 ori $a3, $zero, 1 ori $a4, $zero, 256 .p2align 4, , 16 -.LBB13_17: # %.preheader34.i47 +.LBB13_15: # %.preheader34.i39 # =>This Inner Loop Header: Depth=1 bstrpick.d $a5, $a3, 31, 0 movgr2fr.d $fa0, $a5 @@ -12325,45 +12165,97 @@ set: # @set addi.w $a3, $a3, 1 addi.d $a4, $a4, -1 addi.d $a2, $a2, 1024 - bnez $a4, .LBB13_17 + bnez $a4, .LBB13_15 +# %bb.16: # %.preheader34.i47.preheader + lu12i.w $a2, 284 + ori $a2, $a2, 1888 + add.d $a1, $a1, $a2 + ori $a2, $zero, 1 + ori $a3, $zero, 256 + .p2align 4, , 16 +.LBB13_17: # %.preheader34.i47 + # =>This Inner Loop Header: Depth=1 + bstrpick.d $a4, $a2, 31, 0 + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + frecip.d $fa0, $fa0 + fcvt.s.d $fa0, $fa0 + xvreplve0.w $xr0, $xr0 + xvst $xr0, $a1, -512 + xvst $xr0, $a1, -480 + xvst $xr0, $a1, -448 + xvst $xr0, $a1, -416 + xvst $xr0, $a1, -384 + xvst $xr0, $a1, -352 + xvst $xr0, $a1, -320 + xvst $xr0, $a1, -288 + xvst $xr0, $a1, -256 + xvst $xr0, $a1, -224 + xvst $xr0, $a1, -192 + xvst $xr0, $a1, -160 + xvst $xr0, $a1, -128 + xvst $xr0, $a1, -96 + xvst $xr0, $a1, -64 + xvst $xr0, $a1, -32 + xvst $xr0, $a1, 0 + xvst $xr0, $a1, 32 + xvst $xr0, $a1, 64 + xvst $xr0, $a1, 96 + xvst $xr0, $a1, 128 + xvst $xr0, $a1, 160 + xvst $xr0, $a1, 192 + xvst $xr0, $a1, 224 + xvst $xr0, $a1, 256 + xvst $xr0, $a1, 288 + xvst $xr0, $a1, 320 + xvst $xr0, $a1, 352 + xvst $xr0, $a1, 384 + xvst $xr0, $a1, 416 + xvst $xr0, $a1, 448 + xvst $xr0, $a1, 480 + addi.w $a2, $a2, 1 + addi.d $a3, $a3, -1 + addi.d $a1, $a1, 1024 + bnez $a3, .LBB13_17 # %bb.18: # %vector.body119.preheader - pcalau12i $a2, %pc_hi20(.LCPI13_0) - xvld $xr0, $a2, %pc_lo12(.LCPI13_0) - pcalau12i $a2, %pc_hi20(.LCPI13_1) - xvld $xr1, $a2, %pc_lo12(.LCPI13_1) - pcalau12i $a2, %pc_hi20(indx) - addi.d $a2, $a2, %pc_lo12(indx) - ori $a1, $a1, 3328 + pcalau12i $a1, %pc_hi20(.LCPI13_0) + xvld $xr0, $a1, %pc_lo12(.LCPI13_0) + pcalau12i $a1, %pc_hi20(.LCPI13_1) + xvld $xr1, $a1, %pc_lo12(.LCPI13_1) + pcalau12i $a1, %pc_hi20(indx) + addi.d $a1, $a1, %pc_lo12(indx) + ori $a0, $a0, 3328 xvrepli.w $xr2, 3 .p2align 4, , 16 .LBB13_19: # %vector.body119 # =>This Inner Loop Header: Depth=1 - xvpickve2gr.d $a3, $xr1, 0 - xvinsgr2vr.w $xr3, $a3, 0 - xvpickve2gr.d $a3, $xr1, 1 - xvinsgr2vr.w $xr3, $a3, 1 - xvpickve2gr.d $a3, $xr1, 2 - xvinsgr2vr.w $xr3, $a3, 2 - xvpickve2gr.d $a3, $xr1, 3 - xvinsgr2vr.w $xr3, $a3, 3 - xvpickve2gr.d $a3, $xr0, 0 - xvinsgr2vr.w $xr3, $a3, 4 - xvpickve2gr.d $a3, $xr0, 1 - xvinsgr2vr.w $xr3, $a3, 5 - xvpickve2gr.d $a3, $xr0, 2 - xvinsgr2vr.w $xr3, $a3, 6 - xvpickve2gr.d $a3, $xr0, 3 - xvinsgr2vr.w $xr3, $a3, 7 + xvpickve2gr.d $a2, $xr1, 0 + xvinsgr2vr.w $xr3, $a2, 0 + xvpickve2gr.d $a2, $xr1, 1 + xvinsgr2vr.w $xr3, $a2, 1 + xvpickve2gr.d $a2, $xr1, 2 + xvinsgr2vr.w $xr3, $a2, 2 + xvpickve2gr.d $a2, $xr1, 3 + xvinsgr2vr.w $xr3, $a2, 3 + xvpickve2gr.d $a2, $xr0, 0 + xvinsgr2vr.w $xr3, $a2, 4 + xvpickve2gr.d $a2, $xr0, 1 + xvinsgr2vr.w $xr3, $a2, 5 + xvpickve2gr.d $a2, $xr0, 2 + xvinsgr2vr.w $xr3, $a2, 6 + xvpickve2gr.d $a2, $xr0, 3 + xvinsgr2vr.w $xr3, $a2, 7 xvaddi.wu $xr3, $xr3, 1 xvand.v $xr3, $xr3, $xr2 xvaddi.wu $xr3, $xr3, 1 - xvst $xr3, $a2, 0 + xvst $xr3, $a1, 0 xvaddi.du $xr1, $xr1, 8 xvaddi.du $xr0, $xr0, 8 - addi.d $a1, $a1, -8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB13_19 + addi.d $a0, $a0, -8 + addi.d $a1, $a1, 32 + bnez $a0, .LBB13_19 # %bb.20: # %middle.block122 + lu12i.w $a0, 260096 st.w $a0, $s0, 0 lu12i.w $a0, 262144 st.w $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/VersaBench/8b10b/CMakeFiles/8b10b.dir/calc.s b/results/MultiSource/Benchmarks/VersaBench/8b10b/CMakeFiles/8b10b.dir/calc.s index 4c71823e..4a06b9cb 100644 --- a/results/MultiSource/Benchmarks/VersaBench/8b10b/CMakeFiles/8b10b.dir/calc.s +++ b/results/MultiSource/Benchmarks/VersaBench/8b10b/CMakeFiles/8b10b.dir/calc.s @@ -139,7 +139,7 @@ bigTableSetup: # @bigTableSetup pcalau12i $a2, %pc_hi20(lookupTable3B) addi.d $a2, $a2, %pc_lo12(lookupTable3B) lu12i.w $a3, 16 - xvreplgr2vr.w $xr3, $a3 + xvldi $xr3, -3583 xvrepli.w $xr4, 29 pcalau12i $a4, %pc_hi20(bigTable) addi.d $a4, $a4, %pc_lo12(bigTable) diff --git a/results/MultiSource/Benchmarks/VersaBench/ecbdes/CMakeFiles/ecbdes.dir/des_enc.s b/results/MultiSource/Benchmarks/VersaBench/ecbdes/CMakeFiles/ecbdes.dir/des_enc.s index f3853570..f85544f3 100644 --- a/results/MultiSource/Benchmarks/VersaBench/ecbdes/CMakeFiles/ecbdes.dir/des_enc.s +++ b/results/MultiSource/Benchmarks/VersaBench/ecbdes/CMakeFiles/ecbdes.dir/des_enc.s @@ -847,9 +847,7 @@ des_encrypt2: # @des_encrypt2 vsrli.d $vr1, $vr0, 3 vslli.d $vr0, $vr0, 29 vadd.d $vr0, $vr1, $vr0 - addi.w $a1, $zero, -1 - lu32i.d $a1, 0 - vreplgr2vr.d $vr1, $a1 + vldi $vr1, -1777 vand.v $vr0, $vr0, $vr1 vst $vr0, $a0, 0 ld.d $s3, $sp, 8 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/VersaBench/ecbdes/CMakeFiles/ecbdes.dir/set_key.s b/results/MultiSource/Benchmarks/VersaBench/ecbdes/CMakeFiles/ecbdes.dir/set_key.s index c4e888a4..f95392d6 100644 --- a/results/MultiSource/Benchmarks/VersaBench/ecbdes/CMakeFiles/ecbdes.dir/set_key.s +++ b/results/MultiSource/Benchmarks/VersaBench/ecbdes/CMakeFiles/ecbdes.dir/set_key.s @@ -267,16 +267,14 @@ des_set_key: # @des_set_key ori $a3, $a3, 259 pcalau12i $a4, %pc_hi20(des_skb) addi.d $a4, $a4, %pc_lo12(des_skb) + pcalau12i $a5, %pc_hi20(.LCPI2_0) + vld $vr0, $a5, %pc_lo12(.LCPI2_0) + pcalau12i $a5, %pc_hi20(.LCPI2_1) + vld $vr1, $a5, %pc_lo12(.LCPI2_1) + pcalau12i $a5, %pc_hi20(.LCPI2_2) + vld $vr2, $a5, %pc_lo12(.LCPI2_2) move $a5, $zero - lu12i.w $a6, -16 - pcalau12i $t0, %pc_hi20(.LCPI2_0) - vld $vr0, $t0, %pc_lo12(.LCPI2_0) - pcalau12i $t0, %pc_hi20(.LCPI2_1) - vld $vr1, $t0, %pc_lo12(.LCPI2_1) - pcalau12i $t0, %pc_hi20(.LCPI2_2) - vld $vr2, $t0, %pc_lo12(.LCPI2_2) - lu32i.d $a6, 0 - vreplgr2vr.d $vr3, $a6 + vldi $vr3, -1780 ori $a6, $zero, 16 .p2align 4, , 16 .LBB2_13: # =>This Inner Loop Header: Depth=1 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Halignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Halignmm.s index cf291bf3..5b80ff8e 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Halignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Halignmm.s @@ -1497,17 +1497,16 @@ H__align: # @H__align ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplgr2vr.w $xr0, $a0 pcalau12i $a5, %pc_hi20(.LCPI2_1) - xvld $xr1, $a5, %pc_lo12(.LCPI2_1) + xvld $xr0, $a5, %pc_lo12(.LCPI2_1) + xvreplgr2vr.w $xr1, $a0 addi.d $a5, $s1, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_53: # %vector.body933 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -1565,7 +1564,7 @@ H__align: # @H__align fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a5, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB2_53 @@ -1611,17 +1610,16 @@ H__align: # @H__align ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplgr2vr.w $xr0, $a0 pcalau12i $a5, %pc_hi20(.LCPI2_1) - xvld $xr1, $a5, %pc_lo12(.LCPI2_1) + xvld $xr0, $a5, %pc_lo12(.LCPI2_1) + xvreplgr2vr.w $xr1, $a0 addi.d $a5, $s2, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_60: # %vector.body948 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -1679,7 +1677,7 @@ H__align: # @H__align fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a5, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB2_60 @@ -2637,20 +2635,19 @@ H__align: # @H__align move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - xvreplgr2vr.d $xr1, $s5 pcalau12i $a5, %pc_hi20(.LCPI2_3) - xvld $xr2, $a5, %pc_lo12(.LCPI2_3) + xvld $xr1, $a5, %pc_lo12(.LCPI2_3) pcalau12i $a5, %pc_hi20(.LCPI2_4) - xvld $xr3, $a5, %pc_lo12(.LCPI2_4) + xvld $xr2, $a5, %pc_lo12(.LCPI2_4) + xvreplgr2vr.d $xr3, $s5 addi.d $a5, $s1, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr4, $a6 + xvldi $xr4, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_178: # %vector.body1058 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr1, $xr2 - xvsub.d $xr6, $xr1, $xr3 + xvsub.d $xr5, $xr3, $xr1 + xvsub.d $xr6, $xr3, $xr2 xvpickve2gr.d $a7, $xr6, 0 xvinsgr2vr.w $xr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -2725,8 +2722,8 @@ H__align: # @H__align fcvt.s.d $fa5, $fa5 xvinsve0.w $xr8, $xr5, 7 xvst $xr8, $a5, 0 - xvaddi.du $xr3, $xr3, 8 xvaddi.du $xr2, $xr2, 8 + xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB2_178 @@ -2781,17 +2778,16 @@ H__align: # @H__align move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - xvreplve0.d $xr3, $xr1 pcalau12i $a5, %pc_hi20(.LCPI2_5) - vld $vr4, $a5, %pc_lo12(.LCPI2_5) + vld $vr3, $a5, %pc_lo12(.LCPI2_5) + xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr5, $a6 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_185: # %vector.body1077 # =>This Inner Loop Header: Depth=1 - vext2xv.du.wu $xr6, $xr4 + vext2xv.du.wu $xr6, $xr3 vld $vr7, $a5, 0 xvffint.d.lu $xr6, $xr6 xvfmul.d $xr6, $xr6, $xr5 @@ -2807,7 +2803,7 @@ H__align: # @H__align vreplvei.w $vr7, $vr7, 3 fcvt.d.s $fa7, $fa7 xvinsve0.d $xr9, $xr7, 3 - xvfmadd.d $xr6, $xr3, $xr6, $xr9 + xvfmadd.d $xr6, $xr4, $xr6, $xr9 xvpickve.d $xr7, $xr6, 1 fcvt.s.d $fa7, $fa7 xvpickve.d $xr8, $xr6, 0 @@ -2820,7 +2816,7 @@ H__align: # @H__align fcvt.s.d $fa6, $fa6 vextrins.w $vr8, $vr6, 48 vst $vr8, $a5, 0 - vaddi.wu $vr4, $vr4, 4 + vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 bnez $a6, .LBB2_185 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalignmm.s index 1367c1b9..23447cc0 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalignmm.s @@ -241,13 +241,13 @@ Lalignmm_hmout: # @Lalignmm_hmout ori $a1, $zero, 8 bltu $a0, $a1, .LBB0_14 # %bb.12: # %vector.memcheck - ld.d $a4, $sp, 384 # 8-byte Folded Reload - alsl.d $a1, $a0, $a4, 2 - ld.d $a3, $sp, 392 # 8-byte Folded Reload - bgeu $a3, $a1, .LBB0_67 -# %bb.13: # %vector.memcheck + ld.d $a3, $sp, 384 # 8-byte Folded Reload alsl.d $a1, $a0, $a3, 2 - bgeu $a4, $a1, .LBB0_67 + ld.d $a2, $sp, 392 # 8-byte Folded Reload + bgeu $a2, $a1, .LBB0_67 +# %bb.13: # %vector.memcheck + alsl.d $a1, $a0, $a2, 2 + bgeu $a3, $a1, .LBB0_67 .LBB0_14: move $a1, $zero .LBB0_15: # %scalar.ph.preheader @@ -287,10 +287,10 @@ Lalignmm_hmout: # @Lalignmm_hmout bltu $a0, $a1, .LBB0_21 # %bb.19: # %vector.memcheck258 alsl.d $a1, $a0, $a5, 2 - ld.d $a3, $sp, 160 # 8-byte Folded Reload - bgeu $a3, $a1, .LBB0_70 + ld.d $a2, $sp, 160 # 8-byte Folded Reload + bgeu $a2, $a1, .LBB0_70 # %bb.20: # %vector.memcheck258 - alsl.d $a1, $a0, $a3, 2 + alsl.d $a1, $a0, $a2, 2 bgeu $a5, $a1, .LBB0_70 .LBB0_21: move $a1, $zero @@ -736,12 +736,8 @@ Lalignmm_hmout: # @Lalignmm_hmout bstrpick.d $a1, $a1, 30, 3 slli.d $a1, $a1, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr2, $a2 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr3, $a2 - move $a2, $a3 - move $a3, $a4 + xvldi $xr2, -912 + xvldi $xr3, -928 move $a4, $a1 .p2align 4, , 16 .LBB0_68: # %vector.body @@ -865,11 +861,8 @@ Lalignmm_hmout: # @Lalignmm_hmout bstrpick.d $a1, $s1, 30, 3 slli.d $a1, $a1, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr2, $a2 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr3, $a2 - move $a2, $a3 + xvldi $xr2, -912 + xvldi $xr3, -928 move $a3, $a5 move $a4, $a1 .p2align 4, , 16 @@ -2939,13 +2932,13 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout ori $a1, $zero, 8 bltu $a0, $a1, .LBB1_15 # %bb.13: # %vector.memcheck - ld.d $a4, $sp, 384 # 8-byte Folded Reload - alsl.d $a1, $a0, $a4, 2 - ld.d $a3, $sp, 392 # 8-byte Folded Reload - bgeu $a3, $a1, .LBB1_68 -# %bb.14: # %vector.memcheck + ld.d $a3, $sp, 384 # 8-byte Folded Reload alsl.d $a1, $a0, $a3, 2 - bgeu $a4, $a1, .LBB1_68 + ld.d $a2, $sp, 392 # 8-byte Folded Reload + bgeu $a2, $a1, .LBB1_68 +# %bb.14: # %vector.memcheck + alsl.d $a1, $a0, $a2, 2 + bgeu $a3, $a1, .LBB1_68 .LBB1_15: move $a1, $zero .LBB1_16: # %scalar.ph.preheader @@ -2986,10 +2979,10 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout bltu $a0, $a1, .LBB1_22 # %bb.20: # %vector.memcheck262 alsl.d $a1, $a0, $a5, 2 - ld.d $a3, $sp, 152 # 8-byte Folded Reload - bgeu $a3, $a1, .LBB1_71 + ld.d $a2, $sp, 152 # 8-byte Folded Reload + bgeu $a2, $a1, .LBB1_71 # %bb.21: # %vector.memcheck262 - alsl.d $a1, $a0, $a3, 2 + alsl.d $a1, $a0, $a2, 2 bgeu $a5, $a1, .LBB1_71 .LBB1_22: move $a1, $zero @@ -3435,12 +3428,8 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout bstrpick.d $a1, $a1, 30, 3 slli.d $a1, $a1, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr2, $a2 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr3, $a2 - move $a2, $a3 - move $a3, $a4 + xvldi $xr2, -912 + xvldi $xr3, -928 move $a4, $a1 .p2align 4, , 16 .LBB1_69: # %vector.body @@ -3564,11 +3553,8 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout bstrpick.d $a1, $s0, 30, 3 slli.d $a1, $a1, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr2, $a2 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr3, $a2 - move $a2, $a3 + xvldi $xr2, -912 + xvldi $xr3, -928 move $a3, $a5 move $a4, $a1 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalignmm.s index a109b0c5..0eb98fc4 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalignmm.s @@ -474,10 +474,8 @@ MSalignmm: # @MSalignmm bstrpick.d $a1, $a1, 30, 3 slli.d $a1, $a1, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr2, $a2 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr3, $a2 + xvldi $xr2, -912 + xvldi $xr3, -928 move $a2, $a7 move $a3, $a5 move $a4, $a1 @@ -604,10 +602,8 @@ MSalignmm: # @MSalignmm bstrpick.d $a1, $a1, 30, 3 slli.d $a1, $a1, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $a2, $zero, 1023 - xvreplgr2vr.d $xr2, $a2 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr3, $a2 + xvldi $xr2, -912 + xvldi $xr3, -928 move $a2, $s0 move $a3, $a6 move $a4, $a1 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Qalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Qalignmm.s index 80e5d54f..ba8fe4f8 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Qalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Qalignmm.s @@ -1787,12 +1787,9 @@ Q__align: # @Q__align bstrpick.d $t2, $t3, 31, 3 slli.d $t2, $t2, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $t3, $zero, 1023 - xvreplgr2vr.d $xr2, $t3 - lu52i.d $t3, $zero, 1022 - xvreplgr2vr.d $xr3, $t3 - lu12i.w $t3, 260096 - xvreplgr2vr.w $xr4, $t3 + xvldi $xr2, -912 + xvldi $xr3, -928 + xvldi $xr4, -1424 move $t3, $a0 move $t4, $a1 move $t5, $a2 @@ -2190,12 +2187,9 @@ Q__align: # @Q__align bstrpick.d $t2, $t3, 31, 3 slli.d $t2, $t2, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $t3, $zero, 1023 - xvreplgr2vr.d $xr2, $t3 - lu52i.d $t3, $zero, 1022 - xvreplgr2vr.d $xr3, $t3 - lu12i.w $t3, 260096 - xvreplgr2vr.w $xr4, $t3 + xvldi $xr2, -912 + xvldi $xr3, -928 + xvldi $xr4, -1424 move $t3, $a0 move $t4, $a1 move $t5, $a2 @@ -2652,17 +2646,16 @@ Q__align: # @Q__align ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplgr2vr.w $xr0, $a0 pcalau12i $a5, %pc_hi20(.LCPI3_1) - xvld $xr1, $a5, %pc_lo12(.LCPI3_1) + xvld $xr0, $a5, %pc_lo12(.LCPI3_1) + xvreplgr2vr.w $xr1, $a0 addi.d $a5, $t3, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -800 move $a6, $a4 .p2align 4, , 16 .LBB3_135: # %vector.body1099 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -2720,7 +2713,7 @@ Q__align: # @Q__align fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a5, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB3_135 @@ -2772,17 +2765,16 @@ Q__align: # @Q__align ori $a6, $zero, 1 move $a4, $a3 bstrins.d $a4, $a6, 2, 0 - xvreplgr2vr.w $xr0, $a0 pcalau12i $a6, %pc_hi20(.LCPI3_1) - xvld $xr1, $a6, %pc_lo12(.LCPI3_1) + xvld $xr0, $a6, %pc_lo12(.LCPI3_1) + xvreplgr2vr.w $xr1, $a0 addi.d $a6, $a1, 4 - lu52i.d $a7, $zero, -1026 - xvreplgr2vr.d $xr2, $a7 + xvldi $xr2, -800 move $a7, $a5 .p2align 4, , 16 .LBB3_143: # %vector.body1114 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -2840,7 +2832,7 @@ Q__align: # @Q__align fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a6, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a7, $a7, -8 addi.d $a6, $a6, 32 bnez $a7, .LBB3_143 @@ -3533,20 +3525,19 @@ Q__align: # @Q__align move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - xvreplgr2vr.d $xr1, $s8 pcalau12i $a5, %pc_hi20(.LCPI3_3) - xvld $xr2, $a5, %pc_lo12(.LCPI3_3) + xvld $xr1, $a5, %pc_lo12(.LCPI3_3) pcalau12i $a5, %pc_hi20(.LCPI3_4) - xvld $xr3, $a5, %pc_lo12(.LCPI3_4) + xvld $xr2, $a5, %pc_lo12(.LCPI3_4) + xvreplgr2vr.d $xr3, $s8 addi.d $a5, $t3, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr4, $a6 + xvldi $xr4, -800 move $a6, $a4 .p2align 4, , 16 .LBB3_216: # %vector.body1300 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr1, $xr2 - xvsub.d $xr6, $xr1, $xr3 + xvsub.d $xr5, $xr3, $xr1 + xvsub.d $xr6, $xr3, $xr2 xvpickve2gr.d $a7, $xr6, 0 xvinsgr2vr.w $xr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -3621,8 +3612,8 @@ Q__align: # @Q__align fcvt.s.d $fa5, $fa5 xvinsve0.w $xr8, $xr5, 7 xvst $xr8, $a5, 0 - xvaddi.du $xr3, $xr3, 8 xvaddi.du $xr2, $xr2, 8 + xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB3_216 @@ -3676,17 +3667,16 @@ Q__align: # @Q__align move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - xvreplve0.d $xr3, $xr1 pcalau12i $a5, %pc_hi20(.LCPI3_5) - vld $vr4, $a5, %pc_lo12(.LCPI3_5) + vld $vr3, $a5, %pc_lo12(.LCPI3_5) + xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr5, $a6 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB3_223: # %vector.body1319 # =>This Inner Loop Header: Depth=1 - vext2xv.du.wu $xr6, $xr4 + vext2xv.du.wu $xr6, $xr3 vld $vr7, $a5, 0 xvffint.d.lu $xr6, $xr6 xvfmul.d $xr6, $xr6, $xr5 @@ -3702,7 +3692,7 @@ Q__align: # @Q__align vreplvei.w $vr7, $vr7, 3 fcvt.d.s $fa7, $fa7 xvinsve0.d $xr9, $xr7, 3 - xvfmadd.d $xr6, $xr3, $xr6, $xr9 + xvfmadd.d $xr6, $xr4, $xr6, $xr9 xvpickve.d $xr7, $xr6, 1 fcvt.s.d $fa7, $fa7 xvpickve.d $xr8, $xr6, 0 @@ -3715,7 +3705,7 @@ Q__align: # @Q__align fcvt.s.d $fa6, $fa6 vextrins.w $vr8, $vr6, 48 vst $vr8, $a5, 0 - vaddi.wu $vr4, $vr4, 4 + vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 bnez $a6, .LBB3_223 @@ -6840,12 +6830,9 @@ Q__align_gapmap: # @Q__align_gapmap bstrpick.d $t2, $t3, 31, 3 slli.d $t2, $t2, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $t3, $zero, 1023 - xvreplgr2vr.d $xr2, $t3 - lu52i.d $t3, $zero, 1022 - xvreplgr2vr.d $xr3, $t3 - lu12i.w $t3, 260096 - xvreplgr2vr.w $xr4, $t3 + xvldi $xr2, -912 + xvldi $xr3, -928 + xvldi $xr4, -1424 move $t3, $a0 move $t4, $a1 move $t5, $a2 @@ -7243,12 +7230,9 @@ Q__align_gapmap: # @Q__align_gapmap bstrpick.d $t2, $t3, 31, 3 slli.d $t2, $t2, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $t3, $zero, 1023 - xvreplgr2vr.d $xr2, $t3 - lu52i.d $t3, $zero, 1022 - xvreplgr2vr.d $xr3, $t3 - lu12i.w $t3, 260096 - xvreplgr2vr.w $xr4, $t3 + xvldi $xr2, -912 + xvldi $xr3, -928 + xvldi $xr4, -1424 move $t3, $a0 move $t4, $a1 move $t5, $a2 @@ -7697,17 +7681,16 @@ Q__align_gapmap: # @Q__align_gapmap ori $a6, $zero, 1 move $a4, $a3 bstrins.d $a4, $a6, 2, 0 - xvreplgr2vr.w $xr0, $a1 pcalau12i $a6, %pc_hi20(.LCPI6_1) - xvld $xr1, $a6, %pc_lo12(.LCPI6_1) + xvld $xr0, $a6, %pc_lo12(.LCPI6_1) + xvreplgr2vr.w $xr1, $a1 addi.d $a6, $t5, 4 - lu52i.d $a7, $zero, -1026 - xvreplgr2vr.d $xr2, $a7 + xvldi $xr2, -800 move $a7, $a5 .p2align 4, , 16 .LBB6_132: # %vector.body1043 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -7765,7 +7748,7 @@ Q__align_gapmap: # @Q__align_gapmap fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a6, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a7, $a7, -8 addi.d $a6, $a6, 32 bnez $a7, .LBB6_132 @@ -7810,17 +7793,16 @@ Q__align_gapmap: # @Q__align_gapmap ori $a7, $zero, 1 move $a5, $a4 bstrins.d $a5, $a7, 2, 0 - xvreplgr2vr.w $xr0, $a1 pcalau12i $a7, %pc_hi20(.LCPI6_1) - xvld $xr1, $a7, %pc_lo12(.LCPI6_1) + xvld $xr0, $a7, %pc_lo12(.LCPI6_1) + xvreplgr2vr.w $xr1, $a1 addi.d $a7, $a2, 4 - lu52i.d $t0, $zero, -1026 - xvreplgr2vr.d $xr2, $t0 + xvldi $xr2, -800 move $t0, $a6 .p2align 4, , 16 .LBB6_139: # %vector.body1057 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -7878,7 +7860,7 @@ Q__align_gapmap: # @Q__align_gapmap fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a7, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $t0, $t0, -8 addi.d $a7, $a7, 32 bnez $t0, .LBB6_139 @@ -8461,21 +8443,20 @@ Q__align_gapmap: # @Q__align_gapmap move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - ld.d $a5, $sp, 344 # 8-byte Folded Reload - xvreplgr2vr.d $xr1, $a5 pcalau12i $a5, %pc_hi20(.LCPI6_3) - xvld $xr2, $a5, %pc_lo12(.LCPI6_3) + xvld $xr1, $a5, %pc_lo12(.LCPI6_3) pcalau12i $a5, %pc_hi20(.LCPI6_4) - xvld $xr3, $a5, %pc_lo12(.LCPI6_4) + xvld $xr2, $a5, %pc_lo12(.LCPI6_4) + ld.d $a5, $sp, 344 # 8-byte Folded Reload + xvreplgr2vr.d $xr3, $a5 addi.d $a5, $t5, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr4, $a6 + xvldi $xr4, -800 move $a6, $a4 .p2align 4, , 16 .LBB6_197: # %vector.body1179 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr1, $xr2 - xvsub.d $xr6, $xr1, $xr3 + xvsub.d $xr5, $xr3, $xr1 + xvsub.d $xr6, $xr3, $xr2 xvpickve2gr.d $a7, $xr6, 0 xvinsgr2vr.w $xr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -8550,8 +8531,8 @@ Q__align_gapmap: # @Q__align_gapmap fcvt.s.d $fa5, $fa5 xvinsve0.w $xr8, $xr5, 7 xvst $xr8, $a5, 0 - xvaddi.du $xr3, $xr3, 8 xvaddi.du $xr2, $xr2, 8 + xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB6_197 @@ -8606,17 +8587,16 @@ Q__align_gapmap: # @Q__align_gapmap move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - xvreplve0.d $xr3, $xr1 pcalau12i $a5, %pc_hi20(.LCPI6_5) - vld $vr4, $a5, %pc_lo12(.LCPI6_5) + vld $vr3, $a5, %pc_lo12(.LCPI6_5) + xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr5, $a6 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB6_204: # %vector.body1198 # =>This Inner Loop Header: Depth=1 - vext2xv.du.wu $xr6, $xr4 + vext2xv.du.wu $xr6, $xr3 vld $vr7, $a5, 0 xvffint.d.lu $xr6, $xr6 xvfmul.d $xr6, $xr6, $xr5 @@ -8632,7 +8612,7 @@ Q__align_gapmap: # @Q__align_gapmap vreplvei.w $vr7, $vr7, 3 fcvt.d.s $fa7, $fa7 xvinsve0.d $xr9, $xr7, 3 - xvfmadd.d $xr6, $xr3, $xr6, $xr9 + xvfmadd.d $xr6, $xr4, $xr6, $xr9 xvpickve.d $xr7, $xr6, 1 fcvt.s.d $fa7, $fa7 xvpickve.d $xr8, $xr6, 0 @@ -8645,7 +8625,7 @@ Q__align_gapmap: # @Q__align_gapmap fcvt.s.d $fa6, $fa6 vextrins.w $vr8, $vr6, 48 vst $vr8, $a5, 0 - vaddi.wu $vr4, $vr4, 4 + vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 bnez $a6, .LBB6_204 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Ralignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Ralignmm.s index 33a95e7c..c57537d1 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Ralignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Ralignmm.s @@ -1624,17 +1624,16 @@ R__align: # @R__align ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplgr2vr.w $xr0, $a0 pcalau12i $a5, %pc_hi20(.LCPI2_1) - xvld $xr1, $a5, %pc_lo12(.LCPI2_1) + xvld $xr0, $a5, %pc_lo12(.LCPI2_1) + xvreplgr2vr.w $xr1, $a0 addi.d $a5, $t2, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_56: # %vector.body826 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -1692,7 +1691,7 @@ R__align: # @R__align fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a5, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB2_56 @@ -1736,17 +1735,16 @@ R__align: # @R__align ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplgr2vr.w $xr0, $a0 pcalau12i $a5, %pc_hi20(.LCPI2_1) - xvld $xr1, $a5, %pc_lo12(.LCPI2_1) + xvld $xr0, $a5, %pc_lo12(.LCPI2_1) + xvreplgr2vr.w $xr1, $a0 addi.d $a5, $t7, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_63: # %vector.body841 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -1804,7 +1802,7 @@ R__align: # @R__align fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a5, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB2_63 @@ -2648,20 +2646,19 @@ R__align: # @R__align move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - xvreplgr2vr.d $xr1, $t4 pcalau12i $a5, %pc_hi20(.LCPI2_3) - xvld $xr2, $a5, %pc_lo12(.LCPI2_3) + xvld $xr1, $a5, %pc_lo12(.LCPI2_3) pcalau12i $a5, %pc_hi20(.LCPI2_4) - xvld $xr3, $a5, %pc_lo12(.LCPI2_4) + xvld $xr2, $a5, %pc_lo12(.LCPI2_4) + xvreplgr2vr.d $xr3, $t4 addi.d $a5, $t2, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr4, $a6 + xvldi $xr4, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_134: # %vector.body951 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr1, $xr2 - xvsub.d $xr6, $xr1, $xr3 + xvsub.d $xr5, $xr3, $xr1 + xvsub.d $xr6, $xr3, $xr2 xvpickve2gr.d $a7, $xr6, 0 xvinsgr2vr.w $xr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -2736,8 +2733,8 @@ R__align: # @R__align fcvt.s.d $fa5, $fa5 xvinsve0.w $xr8, $xr5, 7 xvst $xr8, $a5, 0 - xvaddi.du $xr3, $xr3, 8 xvaddi.du $xr2, $xr2, 8 + xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB2_134 @@ -2791,17 +2788,16 @@ R__align: # @R__align move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - xvreplve0.d $xr3, $xr1 pcalau12i $a5, %pc_hi20(.LCPI2_5) - vld $vr4, $a5, %pc_lo12(.LCPI2_5) + vld $vr3, $a5, %pc_lo12(.LCPI2_5) + xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr5, $a6 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_141: # %vector.body970 # =>This Inner Loop Header: Depth=1 - vext2xv.du.wu $xr6, $xr4 + vext2xv.du.wu $xr6, $xr3 vld $vr7, $a5, 0 xvffint.d.lu $xr6, $xr6 xvfmul.d $xr6, $xr6, $xr5 @@ -2817,7 +2813,7 @@ R__align: # @R__align vreplvei.w $vr7, $vr7, 3 fcvt.d.s $fa7, $fa7 xvinsve0.d $xr9, $xr7, 3 - xvfmadd.d $xr6, $xr3, $xr6, $xr9 + xvfmadd.d $xr6, $xr4, $xr6, $xr9 xvpickve.d $xr7, $xr6, 1 fcvt.s.d $fa7, $fa7 xvpickve.d $xr8, $xr6, 0 @@ -2830,7 +2826,7 @@ R__align: # @R__align fcvt.s.d $fa6, $fa6 vextrins.w $vr8, $vr6, 48 vst $vr8, $a5, 0 - vaddi.wu $vr4, $vr4, 4 + vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 bnez $a6, .LBB2_141 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/SAalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/SAalignmm.s index 422aea62..aece87dd 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/SAalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/SAalignmm.s @@ -293,8 +293,7 @@ Aalign: # @Aalign xvreplve0.d $xr1, $xr0 ld.d $a5, $sp, 80 # 8-byte Folded Reload addi.d $a5, $a5, 4 - lu52i.d $a6, $zero, 1022 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -928 move $a6, $a4 .p2align 4, , 16 .LBB0_24: # %vector.body318 @@ -391,8 +390,7 @@ Aalign: # @Aalign bstrins.d $a3, $a5, 2, 0 xvreplve0.d $xr1, $xr0 addi.d $a5, $s7, 4 - lu52i.d $a6, $zero, 1022 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -928 move $a6, $a4 .p2align 4, , 16 .LBB0_31: # %vector.body332 @@ -535,8 +533,7 @@ Aalign: # @Aalign slli.d $a3, $a3, 3 vext2xv.d.w $xr0, $xr0 xvffint.d.l $xr0, $xr0 - lu52i.d $a5, $zero, 1022 - xvreplgr2vr.d $xr1, $a5 + xvldi $xr1, -928 xvrepli.b $xr2, 0 move $a5, $a1 move $a6, $a2 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Salignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Salignmm.s index b8424e8b..8fa5fb4a 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Salignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Salignmm.s @@ -1232,17 +1232,16 @@ A__align: # @A__align ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplgr2vr.w $xr0, $a0 pcalau12i $a5, %pc_hi20(.LCPI3_1) - xvld $xr1, $a5, %pc_lo12(.LCPI3_1) + xvld $xr0, $a5, %pc_lo12(.LCPI3_1) + xvreplgr2vr.w $xr1, $a0 addi.d $a5, $s5, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -800 move $a6, $a4 .p2align 4, , 16 .LBB3_52: # %vector.body605 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -1300,7 +1299,7 @@ A__align: # @A__align fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a5, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB3_52 @@ -1417,18 +1416,17 @@ A__align: # @A__align ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplgr2vr.w $xr0, $a0 pcalau12i $a5, %pc_hi20(.LCPI3_1) - xvld $xr1, $a5, %pc_lo12(.LCPI3_1) + xvld $xr0, $a5, %pc_lo12(.LCPI3_1) + xvreplgr2vr.w $xr1, $a0 ld.d $a5, $sp, 344 # 8-byte Folded Reload addi.d $a5, $a5, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -800 move $a6, $a4 .p2align 4, , 16 .LBB3_68: # %vector.body620 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -1486,7 +1484,7 @@ A__align: # @A__align fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a5, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB3_68 @@ -1900,20 +1898,19 @@ A__align: # @A__align move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - xvreplgr2vr.d $xr1, $t3 pcalau12i $a5, %pc_hi20(.LCPI3_2) - xvld $xr2, $a5, %pc_lo12(.LCPI3_2) + xvld $xr1, $a5, %pc_lo12(.LCPI3_2) pcalau12i $a5, %pc_hi20(.LCPI3_3) - xvld $xr3, $a5, %pc_lo12(.LCPI3_3) + xvld $xr2, $a5, %pc_lo12(.LCPI3_3) + xvreplgr2vr.d $xr3, $t3 addi.d $a5, $s5, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr4, $a6 + xvldi $xr4, -800 move $a6, $a4 .p2align 4, , 16 .LBB3_112: # %vector.body844 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr1, $xr2 - xvsub.d $xr6, $xr1, $xr3 + xvsub.d $xr5, $xr3, $xr1 + xvsub.d $xr6, $xr3, $xr2 xvpickve2gr.d $a7, $xr6, 0 xvinsgr2vr.w $xr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -1988,8 +1985,8 @@ A__align: # @A__align fcvt.s.d $fa5, $fa5 xvinsve0.w $xr8, $xr5, 7 xvst $xr8, $a5, 0 - xvaddi.du $xr3, $xr3, 8 xvaddi.du $xr2, $xr2, 8 + xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB3_112 @@ -2044,17 +2041,16 @@ A__align: # @A__align move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - xvreplve0.d $xr3, $xr1 pcalau12i $a5, %pc_hi20(.LCPI3_4) - vld $vr4, $a5, %pc_lo12(.LCPI3_4) + vld $vr3, $a5, %pc_lo12(.LCPI3_4) + xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr5, $a6 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB3_119: # %vector.body863 # =>This Inner Loop Header: Depth=1 - vext2xv.du.wu $xr6, $xr4 + vext2xv.du.wu $xr6, $xr3 vld $vr7, $a5, 0 xvffint.d.lu $xr6, $xr6 xvfmul.d $xr6, $xr6, $xr5 @@ -2070,7 +2066,7 @@ A__align: # @A__align vreplvei.w $vr7, $vr7, 3 fcvt.d.s $fa7, $fa7 xvinsve0.d $xr9, $xr7, 3 - xvfmadd.d $xr6, $xr3, $xr6, $xr9 + xvfmadd.d $xr6, $xr4, $xr6, $xr9 xvpickve.d $xr7, $xr6, 1 fcvt.s.d $fa7, $fa7 xvpickve.d $xr8, $xr6, 0 @@ -2083,7 +2079,7 @@ A__align: # @A__align fcvt.s.d $fa6, $fa6 vextrins.w $vr8, $vr6, 48 vst $vr8, $a5, 0 - vaddi.wu $vr4, $vr4, 4 + vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 bnez $a6, .LBB3_119 @@ -2314,10 +2310,8 @@ A__align: # @A__align bstrpick.d $a3, $s2, 30, 3 slli.d $a3, $a3, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr2, $a4 - lu52i.d $a4, $zero, 1022 - xvreplgr2vr.d $xr3, $a4 + xvldi $xr2, -912 + xvldi $xr3, -928 move $a4, $a0 move $a5, $a2 move $a6, $a3 @@ -2443,10 +2437,8 @@ A__align: # @A__align bstrpick.d $a3, $s0, 30, 3 slli.d $a3, $a3, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr2, $a4 - lu52i.d $a4, $zero, 1022 - xvreplgr2vr.d $xr3, $a4 + xvldi $xr2, -912 + xvldi $xr3, -928 move $a4, $a0 move $a5, $a2 move $a6, $a3 @@ -5078,17 +5070,16 @@ A__align_gapmap: # @A__align_gapmap ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplgr2vr.w $xr0, $a0 pcalau12i $a5, %pc_hi20(.LCPI6_1) - xvld $xr1, $a5, %pc_lo12(.LCPI6_1) + xvld $xr0, $a5, %pc_lo12(.LCPI6_1) + xvreplgr2vr.w $xr1, $a0 addi.d $a5, $s5, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -800 move $a6, $a4 .p2align 4, , 16 .LBB6_49: # %vector.body544 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -5146,7 +5137,7 @@ A__align_gapmap: # @A__align_gapmap fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a5, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB6_49 @@ -5262,18 +5253,17 @@ A__align_gapmap: # @A__align_gapmap ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplgr2vr.w $xr0, $a0 pcalau12i $a5, %pc_hi20(.LCPI6_1) - xvld $xr1, $a5, %pc_lo12(.LCPI6_1) + xvld $xr0, $a5, %pc_lo12(.LCPI6_1) + xvreplgr2vr.w $xr1, $a0 ld.d $a5, $sp, 280 # 8-byte Folded Reload addi.d $a5, $a5, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -800 move $a6, $a4 .p2align 4, , 16 .LBB6_65: # %vector.body558 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -5331,7 +5321,7 @@ A__align_gapmap: # @A__align_gapmap fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a5, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB6_65 @@ -5642,21 +5632,20 @@ A__align_gapmap: # @A__align_gapmap move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - ld.d $a5, $sp, 216 # 8-byte Folded Reload - xvreplgr2vr.d $xr1, $a5 pcalau12i $a5, %pc_hi20(.LCPI6_2) - xvld $xr2, $a5, %pc_lo12(.LCPI6_2) + xvld $xr1, $a5, %pc_lo12(.LCPI6_2) pcalau12i $a5, %pc_hi20(.LCPI6_3) - xvld $xr3, $a5, %pc_lo12(.LCPI6_3) + xvld $xr2, $a5, %pc_lo12(.LCPI6_3) + ld.d $a5, $sp, 216 # 8-byte Folded Reload + xvreplgr2vr.d $xr3, $a5 addi.d $a5, $s5, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr4, $a6 + xvldi $xr4, -800 move $a6, $a4 .p2align 4, , 16 .LBB6_97: # %vector.body718 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr1, $xr2 - xvsub.d $xr6, $xr1, $xr3 + xvsub.d $xr5, $xr3, $xr1 + xvsub.d $xr6, $xr3, $xr2 xvpickve2gr.d $a7, $xr6, 0 xvinsgr2vr.w $xr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -5731,8 +5720,8 @@ A__align_gapmap: # @A__align_gapmap fcvt.s.d $fa5, $fa5 xvinsve0.w $xr8, $xr5, 7 xvst $xr8, $a5, 0 - xvaddi.du $xr3, $xr3, 8 xvaddi.du $xr2, $xr2, 8 + xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB6_97 @@ -5786,17 +5775,16 @@ A__align_gapmap: # @A__align_gapmap move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - xvreplve0.d $xr3, $xr1 pcalau12i $a5, %pc_hi20(.LCPI6_4) - vld $vr4, $a5, %pc_lo12(.LCPI6_4) + vld $vr3, $a5, %pc_lo12(.LCPI6_4) + xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr5, $a6 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB6_104: # %vector.body737 # =>This Inner Loop Header: Depth=1 - vext2xv.du.wu $xr6, $xr4 + vext2xv.du.wu $xr6, $xr3 vld $vr7, $a5, 0 xvffint.d.lu $xr6, $xr6 xvfmul.d $xr6, $xr6, $xr5 @@ -5812,7 +5800,7 @@ A__align_gapmap: # @A__align_gapmap vreplvei.w $vr7, $vr7, 3 fcvt.d.s $fa7, $fa7 xvinsve0.d $xr9, $xr7, 3 - xvfmadd.d $xr6, $xr3, $xr6, $xr9 + xvfmadd.d $xr6, $xr4, $xr6, $xr9 xvpickve.d $xr7, $xr6, 1 fcvt.s.d $fa7, $fa7 xvpickve.d $xr8, $xr6, 0 @@ -5825,7 +5813,7 @@ A__align_gapmap: # @A__align_gapmap fcvt.s.d $fa6, $fa6 vextrins.w $vr8, $vr6, 48 vst $vr8, $a5, 0 - vaddi.wu $vr4, $vr4, 4 + vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 bnez $a6, .LBB6_104 @@ -6054,10 +6042,8 @@ A__align_gapmap: # @A__align_gapmap .LBB6_138: # %vector.ph bstrpick.d $a3, $s1, 30, 3 slli.d $a3, $a3, 3 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 - lu52i.d $a4, $zero, 1022 - xvreplgr2vr.d $xr1, $a4 + xvldi $xr0, -912 + xvldi $xr1, -928 move $a4, $a0 move $a5, $a2 move $a6, $a3 @@ -6179,10 +6165,8 @@ A__align_gapmap: # @A__align_gapmap ld.d $a3, $sp, 216 # 8-byte Folded Reload bstrpick.d $a3, $a3, 30, 3 slli.d $a3, $a3, 3 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr0, $a4 - lu52i.d $a4, $zero, 1022 - xvreplgr2vr.d $xr1, $a4 + xvldi $xr0, -912 + xvldi $xr1, -928 move $a4, $a0 move $a5, $a2 move $a6, $a3 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/constants.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/constants.s index bbd8e96e..342ba8d9 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/constants.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/constants.s @@ -2753,22 +2753,21 @@ constants: # @constants .LBB2_64: # %.preheader1156 ld.d $s2, $s0, 0 xvld $xr0, $s2, 0 - lu52i.d $a0, $zero, 1025 - ld.d $a1, $s0, 8 - xvreplgr2vr.d $xr1, $a0 + ld.d $a0, $s0, 8 + xvldi $xr1, -1008 xvfmul.d $xr0, $xr0, $xr1 xvst $xr0, $s2, 0 - xvld $xr0, $a1, 0 - ld.d $a0, $s0, 16 - xvfmul.d $xr0, $xr0, $xr1 - xvst $xr0, $a1, 0 xvld $xr0, $a0, 0 - ld.d $a1, $s0, 24 + ld.d $a1, $s0, 16 xvfmul.d $xr0, $xr0, $xr1 xvst $xr0, $a0, 0 xvld $xr0, $a1, 0 + ld.d $a0, $s0, 24 xvfmul.d $xr0, $xr0, $xr1 xvst $xr0, $a1, 0 + xvld $xr0, $a0, 0 + xvfmul.d $xr0, $xr0, $xr1 + xvst $xr0, $a0, 0 fld.d $fa0, $s2, 0 movgr2fr.d $fs0, $zero fcmp.ceq.d $fcc0, $fa0, $fs0 @@ -4018,10 +4017,8 @@ constants: # @constants # %bb.123: # %.preheader1183.preheader move $a0, $zero xvrepli.b $xr0, 0 - lu52i.d $a1, $zero, -1026 - xvreplgr2vr.d $xr1, $a1 - lu52i.d $a1, $zero, 1022 - xvreplgr2vr.d $xr2, $a1 + xvldi $xr1, -800 + xvldi $xr2, -928 vrepli.b $vr3, 0 ori $a1, $zero, 160 .p2align 4, , 16 @@ -4266,8 +4263,7 @@ constants: # @constants jirl $ra, $ra, 0 b .LBB2_528 .LBB2_129: - lu52i.d $a0, $zero, 1021 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -944 xvst $xr0, $fp, 0 vldi $vr0, -944 vldi $vr1, -944 @@ -4836,10 +4832,8 @@ constants: # @constants # %bb.153: # %.preheader1164.preheader move $a0, $zero xvrepli.b $xr0, 0 - lu52i.d $a1, $zero, -1026 - xvreplgr2vr.d $xr1, $a1 - lu52i.d $a1, $zero, 1022 - xvreplgr2vr.d $xr2, $a1 + xvldi $xr1, -800 + xvldi $xr2, -928 vrepli.b $vr3, 0 ori $a1, $zero, 160 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/io.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/io.s index 7acfd954..a2281f4b 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/io.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/io.s @@ -12551,8 +12551,7 @@ loadaamtx: # @loadaamtx jirl $ra, $ra, 0 .LBB76_39: # %.loopexit174 ld.d $a0, $s0, 160 - lu52i.d $a1, $zero, -1025 - xvreplgr2vr.d $xr0, $a1 + xvldi $xr0, -784 xvst $xr0, $a0, 0 xvst $xr0, $a0, 32 xvst $xr0, $a0, 64 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s index 82c6d853..d8c43f3c 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s @@ -8252,12 +8252,11 @@ veryfastsupg: # @veryfastsupg vldi $vr0, -928 pcalau12i $a5, %pc_hi20(.LCPI27_0) fld.d $fs0, $a5, %pc_lo12(.LCPI27_0) - lu52i.d $a5, $zero, 1022 - xvreplgr2vr.d $xr1, $a5 ori $a5, $zero, 0 lu32i.d $a5, -97152 lu52i.d $a5, $a5, 1042 - xvreplgr2vr.d $xr2, $a5 + xvreplgr2vr.d $xr1, $a5 + xvldi $xr2, -928 b .LBB27_4 .p2align 4, , 16 .LBB27_3: # %._crit_edge.us @@ -8287,8 +8286,8 @@ veryfastsupg: # @veryfastsupg # => This Inner Loop Header: Depth=2 xvld $xr3, $t0, -32 xvld $xr4, $t0, 0 - xvfmadd.d $xr3, $xr3, $xr2, $xr1 - xvfmadd.d $xr4, $xr4, $xr2, $xr1 + xvfmadd.d $xr3, $xr3, $xr1, $xr2 + xvfmadd.d $xr4, $xr4, $xr1, $xr2 xvftintrz.l.d $xr3, $xr3 xvpermi.d $xr5, $xr3, 238 xvpickev.w $xr3, $xr5, $xr3 @@ -11329,8 +11328,7 @@ counteff_simple_float: # @counteff_simple_float addi.d $a2, $a0, 32 bstrpick.d $a1, $fp, 30, 3 slli.d $a1, $a1, 3 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 move $a3, $a1 .p2align 4, , 16 .LBB35_3: # %vector.body @@ -11577,8 +11575,7 @@ counteff_simple: # @counteff_simple addi.d $a2, $a0, 32 bstrpick.d $a1, $fp, 30, 3 slli.d $a1, $a1, 3 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 move $a3, $a1 .p2align 4, , 16 .LBB36_3: # %vector.body @@ -11978,8 +11975,7 @@ counteff: # @counteff # %bb.25: # %vector.ph slli.d $a1, $a0, 3 addi.d $a2, $sp, 48 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 move $a3, $a1 .p2align 4, , 16 .LBB37_26: # %vector.body @@ -12859,7 +12855,7 @@ treeconstruction: # @treeconstruction slli.d $a1, $a1, 3 ori $a2, $zero, 8 lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 b .LBB44_18 .p2align 4, , 16 .LBB44_17: # %._crit_edge.us diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/pairlocalalign.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/pairlocalalign.s index ad05b0fd..d8740736 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/pairlocalalign.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/pairlocalalign.s @@ -1447,8 +1447,7 @@ main: # @main bstrpick.d $a1, $s7, 30, 3 slli.d $a1, $a1, 3 addi.d $a2, $a0, 32 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 move $a3, $a1 .p2align 4, , 16 .LBB2_52: # %vector.body diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partQalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partQalignmm.s index f94e3014..ee3ed92b 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partQalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partQalignmm.s @@ -2370,17 +2370,16 @@ partQ__align: # @partQ__align ori $a6, $zero, 1 move $a4, $a3 bstrins.d $a4, $a6, 2, 0 - xvreplgr2vr.w $xr0, $a1 pcalau12i $a6, %pc_hi20(.LCPI4_1) - xvld $xr1, $a6, %pc_lo12(.LCPI4_1) + xvld $xr0, $a6, %pc_lo12(.LCPI4_1) + xvreplgr2vr.w $xr1, $a1 addi.d $a6, $t5, 4 - lu52i.d $a7, $zero, -1026 - xvreplgr2vr.d $xr2, $a7 + xvldi $xr2, -800 move $a7, $a5 .p2align 4, , 16 .LBB4_63: # %vector.body1166 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -2438,7 +2437,7 @@ partQ__align: # @partQ__align fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a6, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a7, $a7, -8 addi.d $a6, $a6, 32 bnez $a7, .LBB4_63 @@ -2482,18 +2481,17 @@ partQ__align: # @partQ__align ori $a6, $zero, 1 move $a4, $a3 bstrins.d $a4, $a6, 2, 0 - xvreplgr2vr.w $xr0, $a1 pcalau12i $a6, %pc_hi20(.LCPI4_1) - xvld $xr1, $a6, %pc_lo12(.LCPI4_1) + xvld $xr0, $a6, %pc_lo12(.LCPI4_1) + xvreplgr2vr.w $xr1, $a1 ld.d $a6, $sp, 408 # 8-byte Folded Reload addi.d $a6, $a6, 4 - lu52i.d $a7, $zero, -1026 - xvreplgr2vr.d $xr2, $a7 + xvldi $xr2, -800 move $a7, $a5 .p2align 4, , 16 .LBB4_70: # %vector.body1180 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -2551,7 +2549,7 @@ partQ__align: # @partQ__align fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a6, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a7, $a7, -8 addi.d $a6, $a6, 32 bnez $a7, .LBB4_70 @@ -2770,12 +2768,9 @@ partQ__align: # @partQ__align bstrpick.d $t2, $t3, 31, 3 slli.d $t2, $t2, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $t3, $zero, 1023 - xvreplgr2vr.d $xr2, $t3 - lu52i.d $t3, $zero, 1022 - xvreplgr2vr.d $xr3, $t3 - lu12i.w $t3, 260096 - xvreplgr2vr.w $xr4, $t3 + xvldi $xr2, -912 + xvldi $xr3, -928 + xvldi $xr4, -1424 move $t3, $a0 move $t4, $a1 move $t5, $a2 @@ -3173,12 +3168,9 @@ partQ__align: # @partQ__align bstrpick.d $t2, $t3, 31, 3 slli.d $t2, $t2, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $t3, $zero, 1023 - xvreplgr2vr.d $xr2, $t3 - lu52i.d $t3, $zero, 1022 - xvreplgr2vr.d $xr3, $t3 - lu12i.w $t3, 260096 - xvreplgr2vr.w $xr4, $t3 + xvldi $xr2, -912 + xvldi $xr3, -928 + xvldi $xr4, -1424 move $t3, $a0 move $t4, $a1 move $t5, $a2 @@ -3919,21 +3911,20 @@ partQ__align: # @partQ__align move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - ld.d $a5, $sp, 48 # 8-byte Folded Reload - xvreplgr2vr.d $xr1, $a5 pcalau12i $a5, %pc_hi20(.LCPI4_3) - xvld $xr2, $a5, %pc_lo12(.LCPI4_3) + xvld $xr1, $a5, %pc_lo12(.LCPI4_3) pcalau12i $a5, %pc_hi20(.LCPI4_4) - xvld $xr3, $a5, %pc_lo12(.LCPI4_4) + xvld $xr2, $a5, %pc_lo12(.LCPI4_4) + ld.d $a5, $sp, 48 # 8-byte Folded Reload + xvreplgr2vr.d $xr3, $a5 addi.d $a5, $t5, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr4, $a6 + xvldi $xr4, -800 move $a6, $a4 .p2align 4, , 16 .LBB4_194: # %vector.body1303 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr1, $xr2 - xvsub.d $xr6, $xr1, $xr3 + xvsub.d $xr5, $xr3, $xr1 + xvsub.d $xr6, $xr3, $xr2 xvpickve2gr.d $a7, $xr6, 0 xvinsgr2vr.w $xr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -4008,8 +3999,8 @@ partQ__align: # @partQ__align fcvt.s.d $fa5, $fa5 xvinsve0.w $xr8, $xr5, 7 xvst $xr8, $a5, 0 - xvaddi.du $xr3, $xr3, 8 xvaddi.du $xr2, $xr2, 8 + xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB4_194 @@ -4064,17 +4055,16 @@ partQ__align: # @partQ__align move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - xvreplve0.d $xr3, $xr1 pcalau12i $a5, %pc_hi20(.LCPI4_5) - vld $vr4, $a5, %pc_lo12(.LCPI4_5) + vld $vr3, $a5, %pc_lo12(.LCPI4_5) + xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr5, $a6 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB4_201: # %vector.body1322 # =>This Inner Loop Header: Depth=1 - vext2xv.du.wu $xr6, $xr4 + vext2xv.du.wu $xr6, $xr3 vld $vr7, $a5, 0 xvffint.d.lu $xr6, $xr6 xvfmul.d $xr6, $xr6, $xr5 @@ -4090,7 +4080,7 @@ partQ__align: # @partQ__align vreplvei.w $vr7, $vr7, 3 fcvt.d.s $fa7, $fa7 xvinsve0.d $xr9, $xr7, 3 - xvfmadd.d $xr6, $xr3, $xr6, $xr9 + xvfmadd.d $xr6, $xr4, $xr6, $xr9 xvpickve.d $xr7, $xr6, 1 fcvt.s.d $fa7, $fa7 xvpickve.d $xr8, $xr6, 0 @@ -4103,7 +4093,7 @@ partQ__align: # @partQ__align fcvt.s.d $fa6, $fa6 vextrins.w $vr8, $vr6, 48 vst $vr8, $a5, 0 - vaddi.wu $vr4, $vr4, 4 + vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 bnez $a6, .LBB4_201 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partSalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partSalignmm.s index 3a642782..54363b78 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partSalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partSalignmm.s @@ -1851,17 +1851,16 @@ partA__align: # @partA__align ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplgr2vr.w $xr0, $a0 pcalau12i $a5, %pc_hi20(.LCPI4_1) - xvld $xr1, $a5, %pc_lo12(.LCPI4_1) + xvld $xr0, $a5, %pc_lo12(.LCPI4_1) + xvreplgr2vr.w $xr1, $a0 addi.d $a5, $t3, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -800 move $a6, $a4 .p2align 4, , 16 .LBB4_67: # %vector.body699 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -1919,7 +1918,7 @@ partA__align: # @partA__align fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a5, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB4_67 @@ -1963,18 +1962,17 @@ partA__align: # @partA__align ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplgr2vr.w $xr0, $a0 pcalau12i $a5, %pc_hi20(.LCPI4_1) - xvld $xr1, $a5, %pc_lo12(.LCPI4_1) + xvld $xr0, $a5, %pc_lo12(.LCPI4_1) + xvreplgr2vr.w $xr1, $a0 ld.d $a5, $sp, 280 # 8-byte Folded Reload addi.d $a5, $a5, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr2, $a6 + xvldi $xr2, -800 move $a6, $a4 .p2align 4, , 16 .LBB4_74: # %vector.body713 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr0, $xr1 + xvmul.w $xr3, $xr1, $xr0 xvpermi.q $xr4, $xr3, 1 vext2xv.d.w $xr4, $xr4 xvffint.d.l $xr4, $xr4 @@ -2032,7 +2030,7 @@ partA__align: # @partA__align fcvt.s.d $fa3, $fa3 xvinsve0.w $xr6, $xr3, 7 xvst $xr6, $a5, 0 - xvaddi.wu $xr1, $xr1, 8 + xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB4_74 @@ -2368,20 +2366,19 @@ partA__align: # @partA__align move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - xvreplgr2vr.d $xr1, $t4 pcalau12i $a5, %pc_hi20(.LCPI4_2) - xvld $xr2, $a5, %pc_lo12(.LCPI4_2) + xvld $xr1, $a5, %pc_lo12(.LCPI4_2) pcalau12i $a5, %pc_hi20(.LCPI4_3) - xvld $xr3, $a5, %pc_lo12(.LCPI4_3) + xvld $xr2, $a5, %pc_lo12(.LCPI4_3) + xvreplgr2vr.d $xr3, $t4 addi.d $a5, $t3, 4 - lu52i.d $a6, $zero, -1026 - xvreplgr2vr.d $xr4, $a6 + xvldi $xr4, -800 move $a6, $a4 .p2align 4, , 16 .LBB4_109: # %vector.body873 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr1, $xr2 - xvsub.d $xr6, $xr1, $xr3 + xvsub.d $xr5, $xr3, $xr1 + xvsub.d $xr6, $xr3, $xr2 xvpickve2gr.d $a7, $xr6, 0 xvinsgr2vr.w $xr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -2456,8 +2453,8 @@ partA__align: # @partA__align fcvt.s.d $fa5, $fa5 xvinsve0.w $xr8, $xr5, 7 xvst $xr8, $a5, 0 - xvaddi.du $xr3, $xr3, 8 xvaddi.du $xr2, $xr2, 8 + xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB4_109 @@ -2510,17 +2507,16 @@ partA__align: # @partA__align move $a0, $a2 bstrins.d $a0, $a4, 1, 0 xvreplve0.d $xr2, $xr0 - xvreplve0.d $xr3, $xr1 pcalau12i $a4, %pc_hi20(.LCPI4_4) - vld $vr4, $a4, %pc_lo12(.LCPI4_4) + vld $vr3, $a4, %pc_lo12(.LCPI4_4) + xvreplve0.d $xr4, $xr1 addi.d $a4, $s4, 4 - lu52i.d $a5, $zero, -1026 - xvreplgr2vr.d $xr5, $a5 + xvldi $xr5, -800 move $a5, $a3 .p2align 4, , 16 .LBB4_116: # %vector.body892 # =>This Inner Loop Header: Depth=1 - vext2xv.du.wu $xr6, $xr4 + vext2xv.du.wu $xr6, $xr3 vld $vr7, $a4, 0 xvffint.d.lu $xr6, $xr6 xvfmul.d $xr6, $xr6, $xr5 @@ -2536,7 +2532,7 @@ partA__align: # @partA__align vreplvei.w $vr7, $vr7, 3 fcvt.d.s $fa7, $fa7 xvinsve0.d $xr9, $xr7, 3 - xvfmadd.d $xr6, $xr3, $xr6, $xr9 + xvfmadd.d $xr6, $xr4, $xr6, $xr9 xvpickve.d $xr7, $xr6, 1 fcvt.s.d $fa7, $fa7 xvpickve.d $xr8, $xr6, 0 @@ -2549,7 +2545,7 @@ partA__align: # @partA__align fcvt.s.d $fa6, $fa6 vextrins.w $vr8, $vr6, 48 vst $vr8, $a4, 0 - vaddi.wu $vr4, $vr4, 4 + vaddi.wu $vr3, $vr3, 4 addi.d $a5, $a5, -4 addi.d $a4, $a4, 16 bnez $a5, .LBB4_116 @@ -2805,10 +2801,8 @@ partA__align: # @partA__align bstrpick.d $a3, $a7, 30, 3 slli.d $a3, $a3, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr2, $a4 - lu52i.d $a4, $zero, 1022 - xvreplgr2vr.d $xr3, $a4 + xvldi $xr2, -912 + xvldi $xr3, -928 move $a4, $a0 move $a5, $a2 move $a6, $a3 @@ -2934,10 +2928,8 @@ partA__align: # @partA__align bstrpick.d $a3, $t0, 30, 3 slli.d $a3, $a3, 3 xvreplve0.d $xr1, $xr0 - lu52i.d $a4, $zero, 1023 - xvreplgr2vr.d $xr2, $a4 - lu52i.d $a4, $zero, 1022 - xvreplgr2vr.d $xr3, $a4 + xvldi $xr2, -912 + xvldi $xr3, -928 move $a4, $a0 move $a5, $a2 move $a6, $a3 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/rna.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/rna.s index f3463c56..64a1cc46 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/rna.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/rna.s @@ -634,8 +634,8 @@ foldrna: # @foldrna lu12i.w $t0, 260096 ori $t1, $zero, 64 ori $t2, $zero, 16 - vreplgr2vr.w $vr1, $t0 - xvreplgr2vr.w $xr2, $t0 + vldi $vr1, -1424 + xvldi $xr2, -1424 b .LBB1_56 .p2align 4, , 16 .LBB1_55: # %._crit_edge352.us diff --git a/results/MultiSource/Benchmarks/mediabench/gsm/toast/CMakeFiles/toast.dir/long_term.s b/results/MultiSource/Benchmarks/mediabench/gsm/toast/CMakeFiles/toast.dir/long_term.s index 55b9bbd2..c6ff93fe 100644 --- a/results/MultiSource/Benchmarks/mediabench/gsm/toast/CMakeFiles/toast.dir/long_term.s +++ b/results/MultiSource/Benchmarks/mediabench/gsm/toast/CMakeFiles/toast.dir/long_term.s @@ -39,26 +39,26 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor move $s4, $a1 vld $vr0, $a1, 16 vslti.h $vr1, $vr0, 0 - lu12i.w $a0, 8 - vreplgr2vr.h $vr2, $a0 + vldi $vr2, -2688 vseq.h $vr3, $vr0, $vr2 vneg.h $vr4, $vr0 lu12i.w $a0, 7 - ori $a1, $a0, 4095 - vld $vr5, $s4, 48 - vreplgr2vr.h $vr6, $a1 + ori $a0, $a0, 4095 + vld $vr5, $a1, 48 + st.d $a0, $sp, 8 # 8-byte Folded Spill + vreplgr2vr.h $vr6, $a0 vbitsel.v $vr3, $vr4, $vr6, $vr3 vbitsel.v $vr0, $vr0, $vr3, $vr1 vslti.h $vr1, $vr5, 0 vseq.h $vr3, $vr5, $vr2 vneg.h $vr4, $vr5 - vld $vr7, $s4, 0 + vld $vr7, $a1, 0 vbitsel.v $vr3, $vr4, $vr6, $vr3 vbitsel.v $vr1, $vr5, $vr3, $vr1 vmax.h $vr0, $vr0, $vr1 vslti.h $vr1, $vr7, 0 vseq.h $vr3, $vr7, $vr2 - vld $vr4, $s4, 32 + vld $vr4, $a1, 32 vneg.h $vr5, $vr7 vbitsel.v $vr3, $vr5, $vr6, $vr3 vbitsel.v $vr1, $vr7, $vr3, $vr1 @@ -75,7 +75,7 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor vbsrl.v $vr1, $vr0, 4 vmax.h $vr0, $vr1, $vr0 vbsrl.v $vr1, $vr0, 2 - vld $vr3, $s4, 64 + vld $vr3, $a1, 64 vmax.h $vr0, $vr1, $vr0 vpickve2gr.h $a0, $vr0, 0 vreplgr2vr.h $vr0, $a0 @@ -97,7 +97,6 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor st.d $a4, $sp, 48 # 8-byte Folded Spill st.d $a3, $sp, 56 # 8-byte Folded Spill move $s5, $a2 - st.d $a1, $sp, 8 # 8-byte Folded Spill st.d $a5, $sp, 32 # 8-byte Folded Spill beqz $a0, .LBB0_2 # %bb.1: @@ -587,10 +586,8 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor vpickve2gr.h $a1, $vr0, 7 ext.w.h $a1, $a1 xvinsgr2vr.w $xr2, $a1, 7 - lu12i.w $a1, 4 - xvreplgr2vr.w $xr0, $a1 - ld.d $a1, $sp, 8 # 8-byte Folded Reload - xvreplgr2vr.w $xr1, $a1 + xvldi $xr0, -3776 + xvldi $xr1, -2433 xvori.b $xr3, $xr0, 0 xvmadd.w $xr3, $xr2, $xr1 xvsrli.w $xr2, $xr3, 15 @@ -872,10 +869,9 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor vpickve2gr.h $a1, $vr0, 7 ext.w.h $a1, $a1 xvinsgr2vr.w $xr2, $a1, 7 - lu12i.w $a1, 4 - xvreplgr2vr.w $xr0, $a1 ori $a1, $zero, 3277 .LBB0_22: # %Long_term_analysis_filtering.exit + xvldi $xr0, -3776 xvreplgr2vr.w $xr1, $a1 xvori.b $xr3, $xr0, 0 xvmadd.w $xr3, $xr2, $xr1 @@ -1236,8 +1232,6 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor vpickve2gr.h $a1, $vr0, 7 ext.w.h $a1, $a1 xvinsgr2vr.w $xr2, $a1, 7 - lu12i.w $a1, 4 - xvreplgr2vr.w $xr0, $a1 lu12i.w $a1, 2 ori $a1, $a1, 3277 b .LBB0_22 @@ -1329,8 +1323,6 @@ Gsm_Long_Term_Predictor: # @Gsm_Long_Term_Predictor vpickve2gr.h $a1, $vr0, 7 ext.w.h $a1, $a1 xvinsgr2vr.w $xr2, $a1, 7 - lu12i.w $a1, 4 - xvreplgr2vr.w $xr0, $a1 lu12i.w $a1, 5 ori $a1, $a1, 819 b .LBB0_22 diff --git a/results/MultiSource/Benchmarks/mediabench/gsm/toast/CMakeFiles/toast.dir/lpc.s b/results/MultiSource/Benchmarks/mediabench/gsm/toast/CMakeFiles/toast.dir/lpc.s index 4605df4f..da60a3aa 100644 --- a/results/MultiSource/Benchmarks/mediabench/gsm/toast/CMakeFiles/toast.dir/lpc.s +++ b/results/MultiSource/Benchmarks/mediabench/gsm/toast/CMakeFiles/toast.dir/lpc.s @@ -34,15 +34,14 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis xvld $xr2, $a1, 32 xvslti.h $xr3, $xr1, 0 xvslti.h $xr4, $xr2, 0 - lu12i.w $a0, 8 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2688 xvseq.h $xr5, $xr1, $xr0 xvseq.h $xr6, $xr2, $xr0 xvneg.h $xr7, $xr1 xvneg.h $xr8, $xr2 lu12i.w $a0, 7 - ori $s4, $a0, 4095 - xvreplgr2vr.h $xr9, $s4 + ori $s3, $a0, 4095 + xvreplgr2vr.h $xr9, $s3 xvbitsel.v $xr5, $xr7, $xr9, $xr5 xvbitsel.v $xr6, $xr8, $xr9, $xr6 xvld $xr7, $a1, 64 @@ -138,8 +137,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis jr $a0 .LBB0_4: # %vector.body108.preheader move $a0, $zero - lu12i.w $a3, 4 - vreplgr2vr.w $vr0, $a3 + vldi $vr0, -3776 ori $a3, $zero, 320 .p2align 4, , 16 .LBB0_5: # %vector.body108 @@ -177,8 +175,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis b .LBB0_14 .LBB0_8: # %vector.body124.preheader move $a0, $zero - lu12i.w $a3, 4 - vreplgr2vr.w $vr0, $a3 + vldi $vr0, -3776 ori $a3, $zero, 320 .p2align 4, , 16 .LBB0_9: # %vector.body124 @@ -209,8 +206,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis b .LBB0_14 .LBB0_10: # %vector.body132.preheader move $a0, $zero - lu12i.w $a3, 4 - vreplgr2vr.w $vr0, $a3 + vldi $vr0, -3776 ori $a3, $zero, 320 .p2align 4, , 16 .LBB0_11: # %vector.body132 @@ -241,8 +237,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis b .LBB0_14 .LBB0_12: # %vector.body116.preheader move $a0, $zero - lu12i.w $a3, 4 - vreplgr2vr.w $vr0, $a3 + vldi $vr0, -3776 ori $a3, $zero, 320 .p2align 4, , 16 .LBB0_13: # %vector.body116 @@ -718,6 +713,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis vpackev.d $vr16, $vr16, $vr17 vst $vr16, $s0, 304 .LBB0_18: # %Autocorrelation.exit + lu12i.w $ra, 8 st.d $s2, $sp, 16 # 8-byte Folded Spill beqz $s1, .LBB0_21 # %bb.19: # %.preheader69.preheader.i @@ -748,7 +744,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis slli.d $fp, $a7, 1 slli.d $s0, $a6, 1 slli.d $s2, $a5, 1 - slli.d $s3, $a4, 1 + slli.d $s4, $a4, 1 slli.d $s6, $a3, 1 slli.d $s7, $a2, 1 slli.d $s8, $a1, 1 @@ -769,7 +765,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis srli.d $a6, $a6, 16 sll.d $a7, $s6, $a0 srli.d $a7, $a7, 16 - sll.d $t0, $s3, $a0 + sll.d $t0, $s4, $a0 srli.d $t0, $t0, 16 sll.d $t1, $s2, $a0 ld.d $t3, $sp, 16 # 8-byte Folded Reload @@ -799,7 +795,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis sltui $a2, $a2, 1 sub.d $a4, $zero, $s5 masknez $a4, $a4, $a2 - maskeqz $a2, $s4, $a2 + maskeqz $a2, $s3, $a2 or $a2, $a2, $a4 masknez $a4, $s5, $a0 maskeqz $a0, $a2, $a0 @@ -813,6 +809,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 + lu12i.w $ra, 8 b .LBB0_22 .LBB0_21: # %.preheader.preheader.i vrepli.b $vr0, 0 @@ -822,12 +819,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $a1, $s1, 0 ext.w.h $a0, $a1 slti $a0, $a0, 0 - lu12i.w $fp, 8 - xor $a2, $a1, $fp + xor $a2, $a1, $ra sltui $a2, $a2, 1 sub.d $a3, $zero, $a1 masknez $a3, $a3, $a2 - maskeqz $a2, $s4, $a2 + maskeqz $a2, $s3, $a2 or $a2, $a2, $a3 maskeqz $a2, $a2, $a0 masknez $a1, $a1, $a0 @@ -853,9 +849,9 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis .LBB0_26: # %.lr.ph.preheader move $s6, $zero ori $s7, $zero, 8 - ori $s3, $zero, 0 - lu32i.d $s3, 32768 - lu12i.w $fp, -8 + ori $fp, $zero, 0 + lu32i.d $fp, 32768 + lu12i.w $s4, -8 ori $s8, $zero, 7 move $s0, $t3 ori $a2, $zero, 1 @@ -865,16 +861,16 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis # in Loop: Header=BB0_28 Depth=1 mul.d $a0, $a1, $a0 slli.d $a0, $a0, 33 - add.d $a0, $a0, $s3 + add.d $a0, $a0, $fp srai.d $a0, $a0, 48 add.d $a0, $a0, $s1 - slt $a1, $a0, $s4 + slt $a1, $a0, $s3 maskeqz $a0, $a0, $a1 - masknez $a1, $s4, $a1 + masknez $a1, $s3, $a1 or $a0, $a0, $a1 - slt $a1, $fp, $a0 + slt $a1, $s4, $a0 maskeqz $a0, $a0, $a1 - masknez $a1, $fp, $a1 + masknez $a1, $s4, $a1 or $a1, $a0, $a1 addi.d $a2, $s2, 1 addi.d $s0, $s0, 2 @@ -882,12 +878,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ext.w.h $a0, $s5 slti $a0, $a0, 0 bstrpick.d $a3, $s5, 15, 0 - lu12i.w $a4, 8 - xor $a3, $a3, $a4 + xor $a3, $a3, $ra sltui $a3, $a3, 1 sub.d $a4, $zero, $s5 masknez $a4, $a4, $a3 - maskeqz $a3, $s4, $a3 + maskeqz $a3, $s3, $a3 or $a3, $a3, $a4 maskeqz $a3, $a3, $a0 masknez $a0, $s5, $a0 @@ -905,6 +900,7 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis move $a1, $s1 pcaddu18i $ra, %call36(gsm_div) jirl $ra, $ra, 0 + lu12i.w $ra, 8 ext.w.h $a1, $s5 slt $a2, $zero, $a1 sub.d $a3, $zero, $a0 @@ -933,29 +929,29 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.h $a6, $a4, 0 ld.h $a7, $a5, 0 mul.d $t0, $a3, $a6 - add.d $t0, $t0, $s3 + add.d $t0, $t0, $fp srai.d $t0, $t0, 48 add.d $t0, $t0, $a7 - slt $t1, $t0, $s4 + slt $t1, $t0, $s3 maskeqz $t0, $t0, $t1 - masknez $t1, $s4, $t1 + masknez $t1, $s3, $t1 or $t0, $t0, $t1 - slt $t1, $fp, $t0 + slt $t1, $s4, $t0 maskeqz $t0, $t0, $t1 - masknez $t1, $fp, $t1 + masknez $t1, $s4, $t1 or $t0, $t0, $t1 st.h $t0, $a5, -2 mul.d $a7, $a3, $a7 - add.d $a7, $a7, $s3 + add.d $a7, $a7, $fp srai.d $a7, $a7, 48 add.d $a6, $a7, $a6 - slt $a7, $a6, $s4 + slt $a7, $a6, $s3 maskeqz $a6, $a6, $a7 - masknez $a7, $s4, $a7 + masknez $a7, $s3, $a7 or $a6, $a6, $a7 - slt $a7, $fp, $a6 + slt $a7, $s4, $a6 maskeqz $a6, $a6, $a7 - masknez $a7, $fp, $a7 + masknez $a7, $s4, $a7 or $a6, $a6, $a7 st.h $a6, $a4, 0 addi.d $a2, $a2, -1 @@ -974,11 +970,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $a4, $s1, 2 ext.w.h $a2, $a4 slti $a2, $a2, 0 - xor $a5, $a4, $fp + xor $a5, $a4, $ra sltui $a5, $a5, 1 sub.d $a6, $zero, $a4 masknez $a6, $a6, $a5 - maskeqz $a5, $s4, $a5 + maskeqz $a5, $s3, $a5 or $a5, $a5, $a6 maskeqz $a5, $a5, $a2 masknez $a4, $a4, $a2 @@ -1006,11 +1002,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $a6, $s1, 4 ext.w.h $a4, $a6 slti $a4, $a4, 0 - xor $a7, $a6, $fp + xor $a7, $a6, $ra sltui $a7, $a7, 1 sub.d $t0, $zero, $a6 masknez $t0, $t0, $a7 - maskeqz $a7, $s4, $a7 + maskeqz $a7, $s3, $a7 or $a7, $a7, $t0 maskeqz $a7, $a7, $a4 masknez $a6, $a6, $a4 @@ -1037,11 +1033,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $t0, $s1, 6 ext.w.h $a7, $t0 slti $a7, $a7, 0 - xor $t1, $t0, $fp + xor $t1, $t0, $ra sltui $t1, $t1, 1 sub.d $t2, $zero, $t0 masknez $t2, $t2, $t1 - maskeqz $t1, $s4, $t1 + maskeqz $t1, $s3, $t1 or $t1, $t1, $t2 maskeqz $t1, $t1, $a7 masknez $t0, $t0, $a7 @@ -1068,11 +1064,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $t2, $s1, 8 ext.w.h $t1, $t2 slti $t1, $t1, 0 - xor $t3, $t2, $fp + xor $t3, $t2, $ra sltui $t3, $t3, 1 sub.d $t4, $zero, $t2 masknez $t4, $t4, $t3 - maskeqz $t3, $s4, $t3 + maskeqz $t3, $s3, $t3 or $t3, $t3, $t4 maskeqz $t3, $t3, $t1 masknez $t2, $t2, $t1 @@ -1099,11 +1095,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $t4, $s1, 10 ext.w.h $t3, $t4 slti $t3, $t3, 0 - xor $t5, $t4, $fp + xor $t5, $t4, $ra sltui $t5, $t5, 1 sub.d $t6, $zero, $t4 masknez $t6, $t6, $t5 - maskeqz $t5, $s4, $t5 + maskeqz $t5, $s3, $t5 or $t5, $t5, $t6 maskeqz $t5, $t5, $t3 masknez $t4, $t4, $t3 @@ -1130,11 +1126,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $t6, $s1, 12 ext.w.h $t5, $t6 slti $t5, $t5, 0 - xor $t7, $t6, $fp + xor $t7, $t6, $ra sltui $t7, $t7, 1 sub.d $t8, $zero, $t6 masknez $t8, $t8, $t7 - maskeqz $t7, $s4, $t7 + maskeqz $t7, $s3, $t7 or $t7, $t7, $t8 maskeqz $t7, $t7, $t5 masknez $t6, $t6, $t5 @@ -1161,11 +1157,11 @@ Gsm_LPC_Analysis: # @Gsm_LPC_Analysis ld.hu $t8, $s1, 14 ext.w.h $t6, $t8 slti $t6, $t6, 0 - xor $fp, $t8, $fp + xor $fp, $t8, $ra sltui $fp, $fp, 1 sub.d $s0, $zero, $t8 masknez $s0, $s0, $fp - maskeqz $fp, $s4, $fp + maskeqz $fp, $s3, $fp or $fp, $fp, $s0 maskeqz $fp, $fp, $t6 masknez $t8, $t8, $t6 diff --git a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jcdctmgr.s b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jcdctmgr.s index a5eedc48..297b11fd 100644 --- a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jcdctmgr.s +++ b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jcdctmgr.s @@ -196,8 +196,7 @@ start_pass_fdctmgr: # @start_pass_fdctmgr fld.d $fs1, $a0, %pc_lo12(.LCPI1_1) pcalau12i $a0, %pc_hi20(.LCPI1_2) fld.d $fs2, $a0, %pc_lo12(.LCPI1_2) - lu52i.d $a0, $zero, 1026 - xvreplgr2vr.d $xr6, $a0 + xvldi $xr6, -992 pcalau12i $a0, %pc_hi20(.LCPI1_3) fld.d $fs3, $a0, %pc_lo12(.LCPI1_3) pcalau12i $a0, %pc_hi20(.LCPI1_4) @@ -205,8 +204,7 @@ start_pass_fdctmgr: # @start_pass_fdctmgr pcalau12i $a0, %pc_hi20(.LCPI1_5) fld.d $fs5, $a0, %pc_lo12(.LCPI1_5) ori $s8, $zero, 64 - ori $a0, $zero, 1024 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3836 xvst $xr0, $sp, 32 # 32-byte Folded Spill xvst $xr6, $sp, 96 # 32-byte Folded Spill b .LBB1_4 @@ -1440,14 +1438,14 @@ forward_DCT_float: # @forward_DCT_float # %bb.0: beqz $a6, .LBB3_4 # %bb.1: # %.preheader.preheader - addi.d $sp, $sp, -384 - st.d $ra, $sp, 376 # 8-byte Folded Spill - st.d $fp, $sp, 368 # 8-byte Folded Spill - st.d $s0, $sp, 360 # 8-byte Folded Spill - st.d $s1, $sp, 352 # 8-byte Folded Spill - st.d $s2, $sp, 344 # 8-byte Folded Spill - st.d $s3, $sp, 336 # 8-byte Folded Spill - st.d $s4, $sp, 328 # 8-byte Folded Spill + addi.d $sp, $sp, -368 + st.d $ra, $sp, 360 # 8-byte Folded Spill + st.d $fp, $sp, 352 # 8-byte Folded Spill + st.d $s0, $sp, 344 # 8-byte Folded Spill + st.d $s1, $sp, 336 # 8-byte Folded Spill + st.d $s2, $sp, 328 # 8-byte Folded Spill + st.d $s3, $sp, 320 # 8-byte Folded Spill + st.d $s4, $sp, 312 # 8-byte Folded Spill move $fp, $a5 ld.d $a0, $a0, 480 ld.w $a1, $a1, 16 @@ -1461,10 +1459,7 @@ forward_DCT_float: # @forward_DCT_float lu12i.w $a0, 288768 ori $a0, $a0, 256 xvreplgr2vr.w $xr0, $a0 - xvst $xr0, $sp, 32 # 32-byte Folded Spill - lu12i.w $a0, 12 - vreplgr2vr.h $vr0, $a0 - vst $vr0, $sp, 16 # 16-byte Folded Spill + xvst $xr0, $sp, 16 # 32-byte Folded Spill .p2align 4, , 16 .LBB3_2: # %.preheader # =>This Inner Loop Header: Depth=1 @@ -1475,342 +1470,342 @@ forward_DCT_float: # @forward_DCT_float addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 72 + fst.s $fa0, $sp, 56 ld.bu $a2, $a1, 1 addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 76 + fst.s $fa0, $sp, 60 ld.bu $a2, $a1, 2 addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 80 + fst.s $fa0, $sp, 64 ld.bu $a2, $a1, 3 addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 84 + fst.s $fa0, $sp, 68 ld.bu $a2, $a1, 4 addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 88 + fst.s $fa0, $sp, 72 ld.bu $a2, $a1, 5 addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 92 + fst.s $fa0, $sp, 76 ld.bu $a2, $a1, 6 addi.d $a2, $a2, -128 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 96 + fst.s $fa0, $sp, 80 ld.bu $a1, $a1, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 8 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 100 + fst.s $fa0, $sp, 84 ldx.bu $a1, $a2, $a0 add.d $a2, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 104 + fst.s $fa0, $sp, 88 ld.bu $a1, $a2, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 108 + fst.s $fa0, $sp, 92 ld.bu $a1, $a2, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 112 + fst.s $fa0, $sp, 96 ld.bu $a1, $a2, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 116 + fst.s $fa0, $sp, 100 ld.bu $a1, $a2, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 120 + fst.s $fa0, $sp, 104 ld.bu $a1, $a2, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 124 + fst.s $fa0, $sp, 108 ld.bu $a1, $a2, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 128 + fst.s $fa0, $sp, 112 ld.bu $a1, $a2, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 16 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 132 + fst.s $fa0, $sp, 116 ldx.bu $a1, $a2, $a0 add.d $a2, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 136 + fst.s $fa0, $sp, 120 ld.bu $a1, $a2, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 140 + fst.s $fa0, $sp, 124 ld.bu $a1, $a2, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 144 + fst.s $fa0, $sp, 128 ld.bu $a1, $a2, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 148 + fst.s $fa0, $sp, 132 ld.bu $a1, $a2, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 152 + fst.s $fa0, $sp, 136 ld.bu $a1, $a2, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 156 + fst.s $fa0, $sp, 140 ld.bu $a1, $a2, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 160 + fst.s $fa0, $sp, 144 ld.bu $a1, $a2, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 24 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 164 + fst.s $fa0, $sp, 148 ldx.bu $a1, $a2, $a0 add.d $a2, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 168 + fst.s $fa0, $sp, 152 ld.bu $a1, $a2, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 172 + fst.s $fa0, $sp, 156 ld.bu $a1, $a2, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 176 + fst.s $fa0, $sp, 160 ld.bu $a1, $a2, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 180 + fst.s $fa0, $sp, 164 ld.bu $a1, $a2, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 184 + fst.s $fa0, $sp, 168 ld.bu $a1, $a2, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 188 + fst.s $fa0, $sp, 172 ld.bu $a1, $a2, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 192 + fst.s $fa0, $sp, 176 ld.bu $a1, $a2, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 32 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 196 + fst.s $fa0, $sp, 180 ldx.bu $a1, $a2, $a0 add.d $a2, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 200 + fst.s $fa0, $sp, 184 ld.bu $a1, $a2, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 204 + fst.s $fa0, $sp, 188 ld.bu $a1, $a2, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 208 + fst.s $fa0, $sp, 192 ld.bu $a1, $a2, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 212 + fst.s $fa0, $sp, 196 ld.bu $a1, $a2, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 216 + fst.s $fa0, $sp, 200 ld.bu $a1, $a2, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 220 + fst.s $fa0, $sp, 204 ld.bu $a1, $a2, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 224 + fst.s $fa0, $sp, 208 ld.bu $a1, $a2, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 40 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 228 + fst.s $fa0, $sp, 212 ldx.bu $a1, $a2, $a0 add.d $a2, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 232 + fst.s $fa0, $sp, 216 ld.bu $a1, $a2, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 236 + fst.s $fa0, $sp, 220 ld.bu $a1, $a2, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 240 + fst.s $fa0, $sp, 224 ld.bu $a1, $a2, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 244 + fst.s $fa0, $sp, 228 ld.bu $a1, $a2, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 248 + fst.s $fa0, $sp, 232 ld.bu $a1, $a2, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 252 + fst.s $fa0, $sp, 236 ld.bu $a1, $a2, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 256 + fst.s $fa0, $sp, 240 ld.bu $a1, $a2, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 48 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 260 + fst.s $fa0, $sp, 244 ldx.bu $a1, $a2, $a0 add.d $a2, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 264 + fst.s $fa0, $sp, 248 ld.bu $a1, $a2, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 268 + fst.s $fa0, $sp, 252 ld.bu $a1, $a2, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 272 + fst.s $fa0, $sp, 256 ld.bu $a1, $a2, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 276 + fst.s $fa0, $sp, 260 ld.bu $a1, $a2, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 280 + fst.s $fa0, $sp, 264 ld.bu $a1, $a2, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 284 + fst.s $fa0, $sp, 268 ld.bu $a1, $a2, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 288 + fst.s $fa0, $sp, 272 ld.bu $a1, $a2, 7 addi.d $a1, $a1, -128 ld.d $a2, $s2, 56 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 292 + fst.s $fa0, $sp, 276 ldx.bu $a1, $a2, $a0 add.d $a0, $a2, $a0 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 296 + fst.s $fa0, $sp, 280 ld.bu $a1, $a0, 1 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 300 + fst.s $fa0, $sp, 284 ld.bu $a1, $a0, 2 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 304 + fst.s $fa0, $sp, 288 ld.bu $a1, $a0, 3 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 308 + fst.s $fa0, $sp, 292 ld.bu $a1, $a0, 4 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 312 + fst.s $fa0, $sp, 296 ld.bu $a1, $a0, 5 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 316 + fst.s $fa0, $sp, 300 ld.bu $a1, $a0, 6 addi.d $a1, $a1, -128 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 320 + fst.s $fa0, $sp, 304 ld.bu $a0, $a0, 7 addi.d $a0, $a0, -128 movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 - fst.s $fa0, $sp, 324 - addi.d $a0, $sp, 72 + fst.s $fa0, $sp, 308 + addi.d $a0, $sp, 56 jirl $ra, $s0, 0 - xvld $xr0, $sp, 72 + xvld $xr0, $sp, 56 xvld $xr1, $s1, 0 xvfmul.s $xr0, $xr0, $xr1 - xvld $xr3, $sp, 32 # 32-byte Folded Reload + xvld $xr3, $sp, 16 # 32-byte Folded Reload xvfadd.s $xr0, $xr0, $xr3 xvftintrz.w.s $xr0, $xr0 xvpickve2gr.w $a0, $xr0, 0 @@ -1828,10 +1823,10 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 104 + xvld $xr0, $sp, 88 xvld $xr2, $s1, 32 vinsgr2vr.h $vr1, $a0, 7 - vld $vr4, $sp, 16 # 16-byte Folded Reload + vldi $vr4, -2624 vadd.h $vr1, $vr1, $vr4 vst $vr1, $s4, -64 xvfmul.s $xr0, $xr0, $xr2 @@ -1852,7 +1847,7 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 136 + xvld $xr0, $sp, 120 xvld $xr2, $s1, 64 vinsgr2vr.h $vr1, $a0, 7 vadd.h $vr1, $vr1, $vr4 @@ -1875,7 +1870,7 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 168 + xvld $xr0, $sp, 152 xvld $xr2, $s1, 96 vinsgr2vr.h $vr1, $a0, 7 vadd.h $vr1, $vr1, $vr4 @@ -1898,7 +1893,7 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 200 + xvld $xr0, $sp, 184 xvld $xr2, $s1, 128 vinsgr2vr.h $vr1, $a0, 7 vadd.h $vr1, $vr1, $vr4 @@ -1921,7 +1916,7 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 232 + xvld $xr0, $sp, 216 xvld $xr2, $s1, 160 vinsgr2vr.h $vr1, $a0, 7 vadd.h $vr1, $vr1, $vr4 @@ -1944,7 +1939,7 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 264 + xvld $xr0, $sp, 248 xvld $xr2, $s1, 192 vinsgr2vr.h $vr1, $a0, 7 vadd.h $vr1, $vr1, $vr4 @@ -1967,7 +1962,7 @@ forward_DCT_float: # @forward_DCT_float xvpickve2gr.w $a0, $xr0, 6 vinsgr2vr.h $vr1, $a0, 6 xvpickve2gr.w $a0, $xr0, 7 - xvld $xr0, $sp, 296 + xvld $xr0, $sp, 280 xvld $xr2, $s1, 224 vinsgr2vr.h $vr1, $a0, 7 vadd.h $vr1, $vr1, $vr4 @@ -1998,14 +1993,14 @@ forward_DCT_float: # @forward_DCT_float addi.d $s4, $s4, 128 bnez $s3, .LBB3_2 # %bb.3: - ld.d $s4, $sp, 328 # 8-byte Folded Reload - ld.d $s3, $sp, 336 # 8-byte Folded Reload - ld.d $s2, $sp, 344 # 8-byte Folded Reload - ld.d $s1, $sp, 352 # 8-byte Folded Reload - ld.d $s0, $sp, 360 # 8-byte Folded Reload - ld.d $fp, $sp, 368 # 8-byte Folded Reload - ld.d $ra, $sp, 376 # 8-byte Folded Reload - addi.d $sp, $sp, 384 + ld.d $s4, $sp, 312 # 8-byte Folded Reload + ld.d $s3, $sp, 320 # 8-byte Folded Reload + ld.d $s2, $sp, 328 # 8-byte Folded Reload + ld.d $s1, $sp, 336 # 8-byte Folded Reload + ld.d $s0, $sp, 344 # 8-byte Folded Reload + ld.d $fp, $sp, 352 # 8-byte Folded Reload + ld.d $ra, $sp, 360 # 8-byte Folded Reload + addi.d $sp, $sp, 368 .LBB3_4: # %._crit_edge ret .Lfunc_end3: diff --git a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jddctmgr.s b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jddctmgr.s index ca5bfa1e..293a54a9 100644 --- a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jddctmgr.s +++ b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jddctmgr.s @@ -177,8 +177,6 @@ start_pass: # @start_pass pcalau12i $a0, %got_pc_hi20(jpeg_idct_1x1) ld.d $s3, $a0, %got_pc_lo12(jpeg_idct_1x1) ori $s4, $zero, 7 - vrepli.b $vr0, 0 - vst $vr0, $sp, 272 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_0) fld.d $fs0, $a0, %pc_lo12(.LCPI1_0) pcalau12i $a0, %pc_hi20(.LCPI1_1) @@ -193,27 +191,28 @@ start_pass: # @start_pass fld.d $fs5, $a0, %pc_lo12(.LCPI1_5) pcalau12i $a0, %pc_hi20(.LCPI1_6) xvld $xr0, $a0, %pc_lo12(.LCPI1_6) - xvst $xr0, $sp, 240 # 32-byte Folded Spill + xvst $xr0, $sp, 256 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_7) xvld $xr0, $a0, %pc_lo12(.LCPI1_7) - xvst $xr0, $sp, 208 # 32-byte Folded Spill + xvst $xr0, $sp, 224 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_8) xvld $xr0, $a0, %pc_lo12(.LCPI1_8) - xvst $xr0, $sp, 176 # 32-byte Folded Spill + xvst $xr0, $sp, 192 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_9) xvld $xr0, $a0, %pc_lo12(.LCPI1_9) - xvst $xr0, $sp, 144 # 32-byte Folded Spill + xvst $xr0, $sp, 160 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_10) xvld $xr0, $a0, %pc_lo12(.LCPI1_10) - xvst $xr0, $sp, 112 # 32-byte Folded Spill + xvst $xr0, $sp, 128 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_11) xvld $xr0, $a0, %pc_lo12(.LCPI1_11) - xvst $xr0, $sp, 80 # 32-byte Folded Spill + xvst $xr0, $sp, 96 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_12) xvld $xr0, $a0, %pc_lo12(.LCPI1_12) - xvst $xr0, $sp, 48 # 32-byte Folded Spill - ori $a0, $zero, 2048 - xvreplgr2vr.w $xr0, $a0 + xvst $xr0, $sp, 64 # 32-byte Folded Spill + vrepli.b $vr0, 0 + vst $vr0, $sp, 48 # 16-byte Folded Spill + xvldi $xr0, -3832 xvst $xr0, $sp, 16 # 32-byte Folded Spill pcalau12i $a0, %pc_hi20(.LJTI1_0) addi.d $s7, $a0, %pc_lo12(.LJTI1_0) @@ -230,7 +229,7 @@ start_pass: # @start_pass ld.d $a5, $a3, 8 vinsgr2vr.d $vr0, $a4, 0 vinsgr2vr.d $vr1, $a5, 0 - vld $vr2, $sp, 272 # 16-byte Folded Reload + vld $vr2, $sp, 48 # 16-byte Folded Reload vilvl.h $vr0, $vr2, $vr0 vilvl.h $vr1, $vr2, $vr1 ld.d $a4, $a3, 16 @@ -813,7 +812,7 @@ start_pass: # @start_pass xvld $xr5, $sp, 16 # 32-byte Folded Reload xvori.b $xr0, $xr5, 0 vld $vr2, $a3, 16 - xvld $xr3, $sp, 240 # 32-byte Folded Reload + xvld $xr3, $sp, 256 # 32-byte Folded Reload xvmadd.w $xr0, $xr1, $xr3 xvsrli.w $xr0, $xr0, 12 xvst $xr0, $a2, 0 @@ -843,7 +842,7 @@ start_pass: # @start_pass xvinsgr2vr.w $xr0, $a4, 7 xvori.b $xr1, $xr5, 0 vld $vr2, $a3, 32 - xvld $xr4, $sp, 208 # 32-byte Folded Reload + xvld $xr4, $sp, 224 # 32-byte Folded Reload xvmadd.w $xr1, $xr0, $xr4 xvsrli.w $xr0, $xr1, 12 xvst $xr0, $a2, 32 @@ -873,7 +872,7 @@ start_pass: # @start_pass xvinsgr2vr.w $xr0, $a4, 7 xvori.b $xr1, $xr5, 0 vld $vr2, $a3, 48 - xvld $xr4, $sp, 176 # 32-byte Folded Reload + xvld $xr4, $sp, 192 # 32-byte Folded Reload xvmadd.w $xr1, $xr0, $xr4 xvsrli.w $xr0, $xr1, 12 xvst $xr0, $a2, 64 @@ -903,7 +902,7 @@ start_pass: # @start_pass xvinsgr2vr.w $xr0, $a4, 7 xvori.b $xr1, $xr5, 0 vld $vr2, $a3, 64 - xvld $xr4, $sp, 144 # 32-byte Folded Reload + xvld $xr4, $sp, 160 # 32-byte Folded Reload xvmadd.w $xr1, $xr0, $xr4 xvsrli.w $xr0, $xr1, 12 xvst $xr0, $a2, 96 @@ -962,7 +961,7 @@ start_pass: # @start_pass xvinsgr2vr.w $xr0, $a4, 7 xvori.b $xr1, $xr5, 0 vld $vr2, $a3, 96 - xvld $xr3, $sp, 112 # 32-byte Folded Reload + xvld $xr3, $sp, 128 # 32-byte Folded Reload xvmadd.w $xr1, $xr0, $xr3 xvsrli.w $xr0, $xr1, 12 xvst $xr0, $a2, 160 @@ -992,7 +991,7 @@ start_pass: # @start_pass xvinsgr2vr.w $xr0, $a4, 7 xvori.b $xr1, $xr5, 0 vld $vr2, $a3, 112 - xvld $xr3, $sp, 80 # 32-byte Folded Reload + xvld $xr3, $sp, 96 # 32-byte Folded Reload xvmadd.w $xr1, $xr0, $xr3 xvsrli.w $xr0, $xr1, 12 xvst $xr0, $a2, 192 @@ -1021,7 +1020,7 @@ start_pass: # @start_pass bstrpick.d $a3, $a3, 15, 0 xvinsgr2vr.w $xr0, $a3, 7 xvori.b $xr1, $xr5, 0 - xvld $xr2, $sp, 48 # 32-byte Folded Reload + xvld $xr2, $sp, 64 # 32-byte Folded Reload xvmadd.w $xr1, $xr0, $xr2 xvsrli.w $xr0, $xr1, 12 xvst $xr0, $a2, 224 diff --git a/results/MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/CMakeFiles/mpeg2decode.dir/getpic.s b/results/MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/CMakeFiles/mpeg2decode.dir/getpic.s index 95848b64..8bcc66f8 100644 --- a/results/MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/CMakeFiles/mpeg2decode.dir/getpic.s +++ b/results/MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/CMakeFiles/mpeg2decode.dir/getpic.s @@ -5,23 +5,23 @@ .type Decode_Picture,@function Decode_Picture: # @Decode_Picture # %bb.0: - addi.d $sp, $sp, -448 - st.d $ra, $sp, 440 # 8-byte Folded Spill - st.d $fp, $sp, 432 # 8-byte Folded Spill - st.d $s0, $sp, 424 # 8-byte Folded Spill - st.d $s1, $sp, 416 # 8-byte Folded Spill - st.d $s2, $sp, 408 # 8-byte Folded Spill - st.d $s3, $sp, 400 # 8-byte Folded Spill - st.d $s4, $sp, 392 # 8-byte Folded Spill - st.d $s5, $sp, 384 # 8-byte Folded Spill - st.d $s6, $sp, 376 # 8-byte Folded Spill - st.d $s7, $sp, 368 # 8-byte Folded Spill - st.d $s8, $sp, 360 # 8-byte Folded Spill + addi.d $sp, $sp, -432 + st.d $ra, $sp, 424 # 8-byte Folded Spill + st.d $fp, $sp, 416 # 8-byte Folded Spill + st.d $s0, $sp, 408 # 8-byte Folded Spill + st.d $s1, $sp, 400 # 8-byte Folded Spill + st.d $s2, $sp, 392 # 8-byte Folded Spill + st.d $s3, $sp, 384 # 8-byte Folded Spill + st.d $s4, $sp, 376 # 8-byte Folded Spill + st.d $s5, $sp, 368 # 8-byte Folded Spill + st.d $s6, $sp, 360 # 8-byte Folded Spill + st.d $s7, $sp, 352 # 8-byte Folded Spill + st.d $s8, $sp, 344 # 8-byte Folded Spill st.d $a1, $sp, 48 # 8-byte Folded Spill move $s0, $a0 pcalau12i $a0, %got_pc_hi20(picture_structure) ld.d $a0, $a0, %got_pc_lo12(picture_structure) - st.d $a0, $sp, 240 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill ld.w $a3, $a0, 0 pcalau12i $a0, %got_pc_hi20(Second_Field) ld.d $fp, $a0, %got_pc_lo12(Second_Field) @@ -35,7 +35,7 @@ Decode_Picture: # @Decode_Picture addi.d $a0, $a0, %pc_lo12(.Lstr) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.w $a3, $a0, 0 st.w $zero, $fp, 0 pcalau12i $a0, %got_pc_hi20(picture_coding_type) @@ -187,7 +187,7 @@ Decode_Picture: # @Decode_Picture pcalau12i $a0, %got_pc_hi20(mb_height) ld.d $a0, $a0, %got_pc_lo12(mb_height) ld.w $s1, $a0, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.w $s3, $a0, 0 pcalau12i $a0, %got_pc_hi20(ld) ld.d $fp, $a0, %got_pc_lo12(ld) @@ -221,7 +221,7 @@ Decode_Picture: # @Decode_Picture ld.d $a0, $sp, 48 # 8-byte Folded Reload beqz $a0, .LBB0_207 # %bb.27: - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.w $a1, $a0, 0 ori $a0, $zero, 3 ld.d $fp, $sp, 40 # 8-byte Folded Reload @@ -242,7 +242,7 @@ Decode_Picture: # @Decode_Picture ld.d $a0, $a0, %got_pc_lo12(auxframe) pcaddu18i $ra, %call36(Write_Frame) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.w $a0, $a0, 0 ori $a1, $zero, 3 bne $a0, $a1, .LBB0_209 @@ -264,14 +264,10 @@ Decode_Picture: # @Decode_Picture pcalau12i $a0, %got_pc_hi20(concealment_motion_vectors) ld.d $a0, $a0, %got_pc_lo12(concealment_motion_vectors) st.d $a0, $sp, 104 # 8-byte Folded Spill - lu12i.w $a0, 15 - ori $a0, $a0, 2048 - vreplgr2vr.h $vr0, $a0 - vst $vr0, $sp, 208 # 16-byte Folded Spill ori $a0, $zero, 2047 vreplgr2vr.h $vr0, $a0 vst $vr0, $sp, 192 # 16-byte Folded Spill - st.d $s7, $sp, 232 # 8-byte Folded Spill + st.d $s7, $sp, 216 # 8-byte Folded Spill b .LBB0_35 .p2align 4, , 16 .LBB0_32: # in Loop: Header=BB0_35 Depth=1 @@ -349,10 +345,10 @@ Decode_Picture: # @Decode_Picture mul.d $a1, $a2, $a1 add.d $a0, $a0, $a1 addi.w $s0, $a0, -1 - st.w $zero, $sp, 328 - st.d $zero, $sp, 320 + st.w $zero, $sp, 312 + st.d $zero, $sp, 304 xvld $xr0, $sp, 64 # 32-byte Folded Reload - xvst $xr0, $sp, 288 + xvst $xr0, $sp, 272 ld.d $a0, $sp, 120 # 8-byte Folded Reload bge $s0, $a0, .LBB0_26 # %bb.41: # %.preheader.i.i.preheader @@ -364,7 +360,7 @@ Decode_Picture: # @Decode_Picture .p2align 4, , 16 .LBB0_42: # %motion_compensation.exit.i.i # in Loop: Header=BB0_43 Depth=2 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.w $a0, $a0, 0 ori $a1, $zero, 3148 ldx.w $a1, $s8, $a1 @@ -381,7 +377,7 @@ Decode_Picture: # @Decode_Picture st.d $a1, $sp, 152 # 8-byte Folded Spill ld.d $s3, $sp, 160 # 8-byte Folded Reload sub.w $s3, $s3, $a0 - ld.d $s7, $sp, 232 # 8-byte Folded Reload + ld.d $s7, $sp, 216 # 8-byte Folded Reload ld.d $a0, $sp, 120 # 8-byte Folded Reload ori $s8, $zero, 1 bge $s0, $a0, .LBB0_26 @@ -412,15 +408,15 @@ Decode_Picture: # @Decode_Picture or $a0, $a0, $a1 st.d $a0, $fp, 0 .LBB0_47: # in Loop: Header=BB0_43 Depth=2 - addi.d $a0, $sp, 340 - addi.d $a1, $sp, 260 - addi.d $a2, $sp, 256 - addi.d $a3, $sp, 336 - addi.d $a4, $sp, 356 - addi.d $a5, $sp, 352 - addi.d $a6, $sp, 348 - addi.d $a7, $sp, 344 - addi.d $t0, $sp, 332 + addi.d $a0, $sp, 324 + addi.d $a1, $sp, 244 + addi.d $a2, $sp, 240 + addi.d $a3, $sp, 320 + addi.d $a4, $sp, 340 + addi.d $a5, $sp, 336 + addi.d $a6, $sp, 332 + addi.d $a7, $sp, 328 + addi.d $t0, $sp, 316 st.d $t0, $sp, 0 pcaddu18i $ra, %call36(macroblock_modes) jirl $ra, $ra, 0 @@ -429,9 +425,9 @@ Decode_Picture: # @Decode_Picture ori $t0, $zero, 3 bnez $a0, .LBB0_206 # %bb.48: # in Loop: Header=BB0_43 Depth=2 - ld.wu $s2, $sp, 340 + ld.wu $s2, $sp, 324 andi $a0, $s2, 16 - ld.d $s0, $sp, 232 # 8-byte Folded Reload + ld.d $s0, $sp, 216 # 8-byte Folded Reload beqz $a0, .LBB0_75 # %bb.49: # in Loop: Header=BB0_43 Depth=2 ori $a0, $zero, 5 @@ -512,18 +508,18 @@ Decode_Picture: # @Decode_Picture jirl $ra, $ra, 0 .LBB0_66: # %._crit_edge.i33.i.i # in Loop: Header=BB0_43 Depth=2 - st.w $zero, $sp, 328 - st.d $zero, $sp, 320 + st.w $zero, $sp, 312 + st.d $zero, $sp, 304 pcalau12i $a0, %got_pc_hi20(picture_coding_type) ld.d $a0, $a0, %got_pc_lo12(picture_coding_type) ld.w $a0, $a0, 0 ori $a1, $zero, 2 bne $a0, $a1, .LBB0_68 # %bb.67: # in Loop: Header=BB0_43 Depth=2 - st.d $zero, $sp, 304 st.d $zero, $sp, 288 + st.d $zero, $sp, 272 .LBB0_68: # in Loop: Header=BB0_43 Depth=2 - ld.d $a1, $sp, 240 # 8-byte Folded Reload + ld.d $a1, $sp, 224 # 8-byte Folded Reload ld.w $a2, $a1, 0 ori $a1, $zero, 2 ori $t0, $zero, 3 @@ -531,19 +527,19 @@ Decode_Picture: # @Decode_Picture # %bb.69: # in Loop: Header=BB0_43 Depth=2 addi.d $a1, $a2, -2 sltui $a1, $a1, 1 - st.w $a1, $sp, 276 - st.w $a1, $sp, 272 + st.w $a1, $sp, 260 + st.w $a1, $sp, 256 ori $a1, $zero, 1 .LBB0_70: # %skipped_macroblock.exit.i.i # in Loop: Header=BB0_43 Depth=2 - st.w $a1, $sp, 336 + st.w $a1, $sp, 320 addi.d $a0, $a0, -1 - ld.w $s1, $sp, 340 + ld.w $s1, $sp, 324 sltui $a0, $a0, 1 slli.d $a0, $a0, 3 - st.w $a0, $sp, 260 + st.w $a0, $sp, 244 bstrins.d $s1, $zero, 0, 0 - st.w $s1, $sp, 340 + st.w $s1, $sp, 324 b .LBB0_128 .LBB0_71: # in Loop: Header=BB0_43 Depth=2 ori $a3, $zero, 1 @@ -561,10 +557,10 @@ Decode_Picture: # @Decode_Picture .LBB0_75: # in Loop: Header=BB0_43 Depth=2 andi $a0, $s2, 8 andi $a1, $s2, 1 - st.d $a1, $sp, 248 # 8-byte Folded Spill + st.d $a1, $sp, 232 # 8-byte Folded Spill bnez $a0, .LBB0_78 # %bb.76: # in Loop: Header=BB0_43 Depth=2 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload beqz $a0, .LBB0_82 # %bb.77: # in Loop: Header=BB0_43 Depth=2 ld.d $a0, $sp, 104 # 8-byte Folded Reload @@ -575,20 +571,20 @@ Decode_Picture: # @Decode_Picture ldptr.w $a0, $a0, 3144 beqz $a0, .LBB0_80 # %bb.79: # in Loop: Header=BB0_43 Depth=2 - ld.w $a4, $sp, 356 - ld.w $a5, $sp, 352 + ld.w $a4, $sp, 340 + ld.w $a5, $sp, 336 pcalau12i $a0, %got_pc_hi20(f_code) ld.d $a0, $a0, %got_pc_lo12(f_code) ld.w $a1, $a0, 0 ld.w $a0, $a0, 4 - ld.w $a3, $sp, 348 - ld.w $a2, $sp, 344 + ld.w $a3, $sp, 332 + ld.w $a2, $sp, 328 addi.w $a6, $a1, -1 addi.w $a7, $a0, -1 st.d $a2, $sp, 8 - addi.d $a0, $sp, 288 - addi.d $a1, $sp, 264 - addi.d $a2, $sp, 272 + addi.d $a0, $sp, 272 + addi.d $a1, $sp, 248 + addi.d $a2, $sp, 256 st.d $a3, $sp, 0 move $a3, $zero pcaddu18i $ra, %call36(motion_vectors) @@ -602,8 +598,8 @@ Decode_Picture: # @Decode_Picture pcalau12i $a0, %got_pc_hi20(full_pel_forward_vector) ld.d $a0, $a0, %got_pc_lo12(full_pel_forward_vector) ld.w $a6, $a0, 0 - addi.d $a0, $sp, 288 - addi.d $a1, $sp, 264 + addi.d $a0, $sp, 272 + addi.d $a1, $sp, 248 move $a3, $a2 move $a4, $zero move $a5, $zero @@ -625,19 +621,19 @@ Decode_Picture: # @Decode_Picture ldptr.w $a0, $a0, 3144 beqz $a0, .LBB0_86 # %bb.85: # in Loop: Header=BB0_43 Depth=2 - ld.w $a4, $sp, 356 - ld.w $a5, $sp, 352 + ld.w $a4, $sp, 340 + ld.w $a5, $sp, 336 pcalau12i $a0, %got_pc_hi20(f_code) ld.d $a0, $a0, %got_pc_lo12(f_code) ld.w $a1, $a0, 8 ld.w $a0, $a0, 12 - ld.w $a2, $sp, 344 + ld.w $a2, $sp, 328 addi.w $a6, $a1, -1 addi.w $a7, $a0, -1 st.d $a2, $sp, 8 - addi.d $a0, $sp, 288 - addi.d $a1, $sp, 264 - addi.d $a2, $sp, 272 + addi.d $a0, $sp, 272 + addi.d $a1, $sp, 248 + addi.d $a2, $sp, 256 ori $a3, $zero, 1 st.d $zero, $sp, 0 pcaddu18i $ra, %call36(motion_vectors) @@ -655,8 +651,8 @@ Decode_Picture: # @Decode_Picture pcalau12i $a0, %got_pc_hi20(full_pel_backward_vector) ld.d $a0, $a0, %got_pc_lo12(full_pel_backward_vector) ld.w $a6, $a0, 0 - addi.d $a1, $sp, 264 - addi.d $a0, $sp, 296 + addi.d $a1, $sp, 248 + addi.d $a0, $sp, 280 move $a3, $a2 move $a4, $zero move $a5, $zero @@ -669,7 +665,7 @@ Decode_Picture: # @Decode_Picture .p2align 4, , 16 .LBB0_87: # %.thread.i.i # in Loop: Header=BB0_43 Depth=2 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload beqz $a0, .LBB0_90 # %bb.88: # %.thread.i.i # in Loop: Header=BB0_43 Depth=2 @@ -682,7 +678,7 @@ Decode_Picture: # @Decode_Picture jirl $ra, $ra, 0 ori $t0, $zero, 3 .LBB0_90: # in Loop: Header=BB0_43 Depth=2 - ld.d $a1, $sp, 232 # 8-byte Folded Reload + ld.d $a1, $sp, 216 # 8-byte Folded Reload ldptr.w $a0, $a1, 3148 ld.d $a2, $sp, 168 # 8-byte Folded Reload ld.d $a3, $sp, 136 # 8-byte Folded Reload @@ -697,7 +693,7 @@ Decode_Picture: # @Decode_Picture andi $a0, $a3, 2 bnez $a0, .LBB0_95 # %bb.94: # in Loop: Header=BB0_43 Depth=2 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload sltui $a0, $a0, 1 pcalau12i $a1, %got_pc_hi20(block_count) ld.d $a1, $a1, %got_pc_lo12(block_count) @@ -751,7 +747,7 @@ Decode_Picture: # @Decode_Picture b .LBB0_107 .p2align 4, , 16 .LBB0_103: # in Loop: Header=BB0_107 Depth=3 - addi.d $a1, $sp, 320 + addi.d $a1, $sp, 304 beqz $a0, .LBB0_114 # %bb.104: # in Loop: Header=BB0_107 Depth=3 move $a0, $s5 @@ -773,7 +769,7 @@ Decode_Picture: # @Decode_Picture # Parent Loop BB0_35 Depth=1 # Parent Loop BB0_43 Depth=2 # => This Inner Loop Header: Depth=3 - ld.d $s8, $sp, 232 # 8-byte Folded Reload + ld.d $s8, $sp, 216 # 8-byte Folded Reload ldptr.w $a0, $s8, 3148 bne $a0, $s1, .LBB0_109 # %bb.108: # in Loop: Header=BB0_107 Depth=3 @@ -797,7 +793,7 @@ Decode_Picture: # @Decode_Picture # %bb.111: # in Loop: Header=BB0_107 Depth=3 ori $a0, $zero, 3144 ldx.w $a0, $s8, $a0 - ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload bnez $a1, .LBB0_103 # %bb.112: # in Loop: Header=BB0_107 Depth=3 beqz $a0, .LBB0_115 @@ -831,15 +827,15 @@ Decode_Picture: # @Decode_Picture jirl $ra, $ra, 0 ori $t0, $zero, 3 .LBB0_118: # in Loop: Header=BB0_43 Depth=2 - ld.d $s7, $sp, 232 # 8-byte Folded Reload + ld.d $s7, $sp, 216 # 8-byte Folded Reload ld.d $s3, $sp, 160 # 8-byte Folded Reload ld.d $s1, $sp, 136 # 8-byte Folded Reload - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload bnez $a0, .LBB0_120 # %bb.119: # %.thread87.i.i # in Loop: Header=BB0_43 Depth=2 - st.w $zero, $sp, 328 - st.d $zero, $sp, 320 + st.w $zero, $sp, 312 + st.d $zero, $sp, 304 andi $a0, $s1, 9 ori $s8, $zero, 1 beqz $a0, .LBB0_122 @@ -857,28 +853,28 @@ Decode_Picture: # @Decode_Picture ori $a1, $zero, 2 bne $a0, $a1, .LBB0_125 # %bb.123: # in Loop: Header=BB0_43 Depth=2 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.w $a0, $a0, 0 - st.d $zero, $sp, 304 st.d $zero, $sp, 288 + st.d $zero, $sp, 272 bne $a0, $t0, .LBB0_140 # %bb.124: # in Loop: Header=BB0_43 Depth=2 ori $a0, $zero, 2 - st.w $a0, $sp, 336 + st.w $a0, $sp, 320 .LBB0_125: # in Loop: Header=BB0_43 Depth=2 - ld.w $a0, $sp, 256 + ld.w $a0, $sp, 240 ori $a1, $zero, 4 bne $a0, $a1, .LBB0_127 .LBB0_126: # in Loop: Header=BB0_43 Depth=2 xvld $xr0, $sp, 64 # 32-byte Folded Reload - xvst $xr0, $sp, 288 + xvst $xr0, $sp, 272 .LBB0_127: # %decode_macroblock.exit.i.i # in Loop: Header=BB0_43 Depth=2 ld.d $s0, $sp, 144 # 8-byte Folded Reload .LBB0_128: # in Loop: Header=BB0_43 Depth=2 pcalau12i $a0, %got_pc_hi20(Two_Streams) ld.d $a0, $a0, %got_pc_lo12(Two_Streams) - st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a0, $sp, 232 # 8-byte Folded Spill ld.w $a0, $a0, 0 beqz $a0, .LBB0_171 # %bb.129: # in Loop: Header=BB0_43 Depth=2 @@ -896,19 +892,19 @@ Decode_Picture: # @Decode_Picture bne $s3, $s8, .LBB0_141 .LBB0_133: # %.thread55.i.i.i # in Loop: Header=BB0_43 Depth=2 - addi.d $a0, $sp, 356 - addi.d $a1, $sp, 348 - addi.d $a2, $sp, 348 - addi.d $a3, $sp, 348 - addi.d $a4, $sp, 348 - addi.d $a5, $sp, 348 - addi.d $a6, $sp, 348 - addi.d $a7, $sp, 348 - addi.d $t0, $sp, 352 + addi.d $a0, $sp, 340 + addi.d $a1, $sp, 332 + addi.d $a2, $sp, 332 + addi.d $a3, $sp, 332 + addi.d $a4, $sp, 332 + addi.d $a5, $sp, 332 + addi.d $a6, $sp, 332 + addi.d $a7, $sp, 332 + addi.d $t0, $sp, 336 st.d $t0, $sp, 0 pcaddu18i $ra, %call36(macroblock_modes) jirl $ra, $ra, 0 - ld.wu $a0, $sp, 356 + ld.wu $a0, $sp, 340 andi $s0, $a0, 2 bnez $s0, .LBB0_150 # %bb.134: # in Loop: Header=BB0_43 Depth=2 @@ -938,11 +934,11 @@ Decode_Picture: # @Decode_Picture beq $a0, $s0, .LBB0_132 b .LBB0_168 .LBB0_140: # in Loop: Header=BB0_43 Depth=2 - st.w $s8, $sp, 336 + st.w $s8, $sp, 320 addi.d $a0, $a0, -2 sltui $a0, $a0, 1 - st.w $a0, $sp, 272 - ld.w $a0, $sp, 256 + st.w $a0, $sp, 256 + ld.w $a0, $sp, 240 ori $a1, $zero, 4 beq $a0, $a1, .LBB0_126 b .LBB0_127 @@ -965,7 +961,7 @@ Decode_Picture: # @Decode_Picture b .LBB0_166 .LBB0_143: # in Loop: Header=BB0_43 Depth=2 xvld $xr0, $sp, 64 # 32-byte Folded Reload - xvst $xr0, $sp, 288 + xvst $xr0, $sp, 272 andi $a0, $s1, 9 ori $s8, $zero, 1 beqz $a0, .LBB0_122 @@ -1004,8 +1000,8 @@ Decode_Picture: # @Decode_Picture ori $t0, $zero, 3 b .LBB0_170 .LBB0_150: # in Loop: Header=BB0_43 Depth=2 - ld.w $a1, $sp, 352 - st.w $a1, $sp, 332 + ld.w $a1, $sp, 336 + st.w $a1, $sp, 316 andi $a0, $a0, 16 beqz $a0, .LBB0_135 .LBB0_151: # in Loop: Header=BB0_43 Depth=2 @@ -1086,14 +1082,14 @@ Decode_Picture: # @Decode_Picture ld.w $s2, $s1, 0 b .LBB0_160 .LBB0_163: # in Loop: Header=BB0_43 Depth=2 - ld.d $s7, $sp, 232 # 8-byte Folded Reload + ld.d $s7, $sp, 216 # 8-byte Folded Reload ori $s8, $zero, 1 ori $t0, $zero, 3 ld.d $s0, $sp, 144 # 8-byte Folded Reload ori $s3, $zero, 1 b .LBB0_165 .LBB0_164: # in Loop: Header=BB0_43 Depth=2 - ld.d $s7, $sp, 232 # 8-byte Folded Reload + ld.d $s7, $sp, 216 # 8-byte Folded Reload ori $s8, $zero, 1 ld.d $s0, $sp, 144 # 8-byte Folded Reload .LBB0_165: # %.loopexit.i.i.i @@ -1144,7 +1140,7 @@ Decode_Picture: # @Decode_Picture .LBB0_171: # in Loop: Header=BB0_43 Depth=2 ld.d $a0, $sp, 112 # 8-byte Folded Reload ld.w $a0, $a0, 0 - ld.w $a1, $sp, 332 + ld.w $a1, $sp, 316 st.d $a1, $sp, 184 # 8-byte Folded Spill div.w $a1, $s0, $a0 mul.d $a0, $a1, $a0 @@ -1154,12 +1150,12 @@ Decode_Picture: # @Decode_Picture slli.w $s5, $a1, 4 bnez $s6, .LBB0_173 # %bb.172: # in Loop: Header=BB0_43 Depth=2 - ld.w $a7, $sp, 260 - ld.w $a3, $sp, 336 + ld.w $a7, $sp, 244 + ld.w $a3, $sp, 320 addi.w $a2, $s1, 0 - addi.d $a4, $sp, 288 - addi.d $a5, $sp, 272 - addi.d $a6, $sp, 264 + addi.d $a4, $sp, 272 + addi.d $a5, $sp, 256 + addi.d $a6, $sp, 248 move $a0, $s4 move $a1, $s5 pcaddu18i $ra, %call36(form_predictions) @@ -1526,7 +1522,7 @@ Decode_Picture: # @Decode_Picture .LBB0_179: # Parent Loop BB0_35 Depth=1 # Parent Loop BB0_43 Depth=2 # => This Inner Loop Header: Depth=3 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.w $a0, $a0, 0 beqz $a0, .LBB0_182 # %bb.180: # in Loop: Header=BB0_179 Depth=3 @@ -1536,7 +1532,7 @@ Decode_Picture: # @Decode_Picture # %bb.181: # %vector.body36 # in Loop: Header=BB0_179 Depth=3 add.d $a0, $s8, $s1 - ld.d $a2, $sp, 232 # 8-byte Folded Reload + ld.d $a2, $sp, 216 # 8-byte Folded Reload add.d $a1, $a2, $s1 xvldx $xr0, $s8, $s1 xvld $xr1, $a0, 32 @@ -1567,7 +1563,7 @@ Decode_Picture: # @Decode_Picture add.d $a1, $a0, $s1 vldx $vr0, $a0, $s1 vld $vr1, $a1, 16 - vld $vr6, $sp, 208 # 16-byte Folded Reload + vldi $vr6, -2568 vmax.h $vr0, $vr0, $vr6 vmax.h $vr1, $vr1, $vr6 vld $vr7, $sp, 192 # 16-byte Folded Reload @@ -1823,7 +1819,7 @@ Decode_Picture: # @Decode_Picture ori $t0, $zero, 3 bltu $t0, $s3, .LBB0_191 .LBB0_187: # in Loop: Header=BB0_179 Depth=3 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.w $a3, $a0, 0 pcalau12i $a0, %got_pc_hi20(current_frame) ld.d $a0, $a0, %got_pc_lo12(current_frame) @@ -1859,7 +1855,7 @@ Decode_Picture: # @Decode_Picture sltu $a1, $zero, $a1 sra.w $a1, $s4, $a1 addi.d $a2, $a4, -1 - ld.d $a3, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $sp, 224 # 8-byte Folded Reload ld.w $a5, $a3, 0 sltui $a2, $a2, 1 sra.w $a3, $s5, $a2 @@ -2351,7 +2347,7 @@ Decode_Picture: # @Decode_Picture .p2align 4, , 16 .LBB0_206: # %decode_macroblock.exit.thread.i.i # in Loop: Header=BB0_35 Depth=1 - ld.d $s7, $sp, 232 # 8-byte Folded Reload + ld.d $s7, $sp, 216 # 8-byte Folded Reload ori $s8, $zero, 1 b .LBB0_34 .LBB0_207: @@ -2361,7 +2357,7 @@ Decode_Picture: # @Decode_Picture pcalau12i $a1, %pc_hi20(frame_reorder.Oldref_progressive_frame) st.w $a0, $a1, %pc_lo12(frame_reorder.Oldref_progressive_frame) ld.d $fp, $sp, 40 # 8-byte Folded Reload - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.w $a0, $a0, 0 ori $a1, $zero, 3 bne $a0, $a1, .LBB0_209 @@ -2385,7 +2381,7 @@ Decode_Picture: # @Decode_Picture st.w $a0, $fp, 0 ld.d $fp, $sp, 40 # 8-byte Folded Reload st.w $a0, $s0, %pc_lo12(frame_reorder.Oldref_progressive_frame) - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.w $a0, $a0, 0 ori $a1, $zero, 3 beq $a0, $a1, .LBB0_211 @@ -2395,18 +2391,18 @@ Decode_Picture: # @Decode_Picture .LBB0_210: # %frame_reorder.exit.thread st.w $a1, $fp, 0 .LBB0_211: - ld.d $s8, $sp, 360 # 8-byte Folded Reload - ld.d $s7, $sp, 368 # 8-byte Folded Reload - ld.d $s6, $sp, 376 # 8-byte Folded Reload - ld.d $s5, $sp, 384 # 8-byte Folded Reload - ld.d $s4, $sp, 392 # 8-byte Folded Reload - ld.d $s3, $sp, 400 # 8-byte Folded Reload - ld.d $s2, $sp, 408 # 8-byte Folded Reload - ld.d $s1, $sp, 416 # 8-byte Folded Reload - ld.d $s0, $sp, 424 # 8-byte Folded Reload - ld.d $fp, $sp, 432 # 8-byte Folded Reload - ld.d $ra, $sp, 440 # 8-byte Folded Reload - addi.d $sp, $sp, 448 + ld.d $s8, $sp, 344 # 8-byte Folded Reload + ld.d $s7, $sp, 352 # 8-byte Folded Reload + ld.d $s6, $sp, 360 # 8-byte Folded Reload + ld.d $s5, $sp, 368 # 8-byte Folded Reload + ld.d $s4, $sp, 376 # 8-byte Folded Reload + ld.d $s3, $sp, 384 # 8-byte Folded Reload + ld.d $s2, $sp, 392 # 8-byte Folded Reload + ld.d $s1, $sp, 400 # 8-byte Folded Reload + ld.d $s0, $sp, 408 # 8-byte Folded Reload + ld.d $fp, $sp, 416 # 8-byte Folded Reload + ld.d $ra, $sp, 424 # 8-byte Folded Reload + addi.d $sp, $sp, 432 ret .LBB0_212: pcalau12i $a0, %got_pc_hi20(Quiet_Flag) diff --git a/results/MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/CMakeFiles/mpeg2decode.dir/idctref.s b/results/MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/CMakeFiles/mpeg2decode.dir/idctref.s index 778db424..d4c35305 100644 --- a/results/MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/CMakeFiles/mpeg2decode.dir/idctref.s +++ b/results/MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/CMakeFiles/mpeg2decode.dir/idctref.s @@ -558,18 +558,17 @@ Reference_IDCT: # @Reference_IDCT xvrepl128vei.d $xr31, $xr1, 0 xvpermi.d $xr1, $xr3, 68 xvrepl128vei.d $xr1, $xr1, 0 - addi.d $a2, $sp, 1776 xvldrepl.d $xr2, $a1, 504 + addi.d $a1, $sp, 1776 addi.d $a0, $a0, 112 - addi.w $a1, $zero, -16 - lu52i.d $a3, $zero, 1022 + addi.w $a2, $zero, -16 .p2align 4, , 16 .LBB1_3: # %vector.body # =>This Inner Loop Header: Depth=1 - xvld $xr3, $a2, -256 - xvld $xr4, $a2, -192 - xvld $xr5, $a2, -128 - xvld $xr6, $a2, -64 + xvld $xr3, $a1, -256 + xvld $xr4, $a1, -192 + xvld $xr5, $a1, -128 + xvld $xr6, $a1, -64 xvld $xr7, $sp, 1488 # 32-byte Folded Reload xvfmadd.d $xr7, $xr7, $xr3, $xr0 xvld $xr8, $sp, 1456 # 32-byte Folded Reload @@ -578,10 +577,10 @@ Reference_IDCT: # @Reference_IDCT xvfmadd.d $xr7, $xr8, $xr5, $xr7 xvld $xr8, $sp, 1392 # 32-byte Folded Reload xvfmadd.d $xr11, $xr8, $xr6, $xr7 - xvld $xr7, $a2, 0 - xvld $xr8, $a2, 64 - xvld $xr9, $a2, 128 - xvld $xr10, $a2, 192 + xvld $xr7, $a1, 0 + xvld $xr8, $a1, 64 + xvld $xr9, $a1, 128 + xvld $xr10, $a1, 192 xvld $xr12, $sp, 1360 # 32-byte Folded Reload xvfmadd.d $xr11, $xr12, $xr7, $xr11 xvld $xr12, $sp, 1328 # 32-byte Folded Reload @@ -590,7 +589,7 @@ Reference_IDCT: # @Reference_IDCT xvfmadd.d $xr11, $xr12, $xr9, $xr11 xvld $xr12, $sp, 1264 # 32-byte Folded Reload xvfmadd.d $xr12, $xr12, $xr10, $xr11 - xvreplgr2vr.d $xr11, $a3 + xvldi $xr11, -928 xvfadd.d $xr12, $xr12, $xr11 xvpickve.d $xr13, $xr12, 1 vreplvei.d $vr13, $vr13, 0 @@ -615,8 +614,8 @@ Reference_IDCT: # @Reference_IDCT vrepli.w $vr13, 255 vmin.w $vr14, $vr14, $vr13 vpickev.h $vr14, $vr14, $vr14 - add.d $a4, $a0, $a1 - vstelm.d $vr14, $a4, -96, 0 + add.d $a3, $a0, $a2 + vstelm.d $vr14, $a3, -96, 0 xvld $xr14, $sp, 1232 # 32-byte Folded Reload xvfmadd.d $xr14, $xr14, $xr3, $xr0 xvld $xr15, $sp, 1200 # 32-byte Folded Reload @@ -655,7 +654,7 @@ Reference_IDCT: # @Reference_IDCT vmax.w $vr14, $vr14, $vr12 vmin.w $vr14, $vr14, $vr13 vpickev.h $vr14, $vr14, $vr14 - vstelm.d $vr14, $a4, -80, 0 + vstelm.d $vr14, $a3, -80, 0 xvld $xr14, $sp, 976 # 32-byte Folded Reload xvfmadd.d $xr14, $xr14, $xr3, $xr0 xvld $xr15, $sp, 944 # 32-byte Folded Reload @@ -694,7 +693,7 @@ Reference_IDCT: # @Reference_IDCT vmax.w $vr14, $vr14, $vr12 vmin.w $vr14, $vr14, $vr13 vpickev.h $vr14, $vr14, $vr14 - vstelm.d $vr14, $a4, -64, 0 + vstelm.d $vr14, $a3, -64, 0 xvld $xr14, $sp, 720 # 32-byte Folded Reload xvfmadd.d $xr14, $xr14, $xr3, $xr0 xvld $xr15, $sp, 688 # 32-byte Folded Reload @@ -733,7 +732,7 @@ Reference_IDCT: # @Reference_IDCT vmax.w $vr14, $vr14, $vr12 vmin.w $vr14, $vr14, $vr13 vpickev.h $vr14, $vr14, $vr14 - vstelm.d $vr14, $a4, -48, 0 + vstelm.d $vr14, $a3, -48, 0 xvld $xr14, $sp, 464 # 32-byte Folded Reload xvfmadd.d $xr14, $xr14, $xr3, $xr0 xvld $xr15, $sp, 432 # 32-byte Folded Reload @@ -772,7 +771,7 @@ Reference_IDCT: # @Reference_IDCT vmax.w $vr14, $vr14, $vr12 vmin.w $vr14, $vr14, $vr13 vpickev.h $vr14, $vr14, $vr14 - vstelm.d $vr14, $a4, -32, 0 + vstelm.d $vr14, $a3, -32, 0 xvld $xr14, $sp, 208 # 32-byte Folded Reload xvfmadd.d $xr14, $xr14, $xr3, $xr0 xvld $xr15, $sp, 176 # 32-byte Folded Reload @@ -810,7 +809,7 @@ Reference_IDCT: # @Reference_IDCT vmax.w $vr14, $vr14, $vr12 vmin.w $vr14, $vr14, $vr13 vpickev.h $vr14, $vr14, $vr14 - vstelm.d $vr14, $a4, -16, 0 + vstelm.d $vr14, $a3, -16, 0 xvfmadd.d $xr14, $xr18, $xr3, $xr0 xvfmadd.d $xr14, $xr19, $xr4, $xr14 xvfmadd.d $xr14, $xr20, $xr5, $xr14 @@ -841,7 +840,7 @@ Reference_IDCT: # @Reference_IDCT vmax.w $vr14, $vr14, $vr12 vmin.w $vr14, $vr14, $vr13 vpickev.h $vr14, $vr14, $vr14 - vstelm.d $vr14, $a4, 0, 0 + vstelm.d $vr14, $a3, 0, 0 xvfmadd.d $xr3, $xr26, $xr3, $xr0 xvfmadd.d $xr3, $xr27, $xr4, $xr3 xvfmadd.d $xr3, $xr28, $xr5, $xr3 @@ -872,10 +871,10 @@ Reference_IDCT: # @Reference_IDCT vmax.w $vr3, $vr3, $vr12 vmin.w $vr3, $vr3, $vr13 vpickev.h $vr3, $vr3, $vr3 - vstelm.d $vr3, $a4, 16, 0 - addi.d $a1, $a1, 8 - addi.d $a2, $a2, 32 - bnez $a1, .LBB1_3 + vstelm.d $vr3, $a3, 16, 0 + addi.d $a2, $a2, 8 + addi.d $a1, $a1, 32 + bnez $a2, .LBB1_3 # %bb.4: # %middle.block addi.d $sp, $sp, 64 fld.d $fs7, $sp, 1968 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/nbench/CMakeFiles/nbench.dir/nbench1.s b/results/MultiSource/Benchmarks/nbench/CMakeFiles/nbench.dir/nbench1.s index b5323e18..477cce7d 100644 --- a/results/MultiSource/Benchmarks/nbench/CMakeFiles/nbench.dir/nbench1.s +++ b/results/MultiSource/Benchmarks/nbench/CMakeFiles/nbench.dir/nbench1.s @@ -8935,8 +8935,7 @@ DoNNetIteration: # @DoNNetIteration xvld $xr2, $sp, 1440 # 32-byte Folded Reload xvinsve0.d $xr1, $xr2, 2 xvinsve0.d $xr1, $xr0, 3 - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -912 xvst $xr0, $sp, 640 # 32-byte Folded Spill xvfadd.d $xr0, $xr1, $xr0 xvfrecip.d $xr0, $xr0 @@ -10968,8 +10967,7 @@ DoNNetIteration: # @DoNNetIteration ld.d $a0, $sp, 56 # 8-byte Folded Reload fst.d $fa0, $a0, %pc_lo12(average_error) fcmp.cult.d $fcc0, $fa1, $fa2 - lu52i.d $a0, $zero, 1027 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -976 bcnez $fcc0, .LBB15_82 # %bb.72: # %.lr.ph.split.i.preheader # in Loop: Header=BB15_25 Depth=2 diff --git a/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s b/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s index 38b7b1ef..7c0309ac 100644 --- a/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s +++ b/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s @@ -8396,8 +8396,7 @@ _ZN18CanonicalCenteringILi2EEC2Ev: # @_ZN18CanonicalCenteringILi2EEC2Ev ld.d $a0, $sp, 464 beq $s3, $a0, .LBB122_16 .LBB122_8: - lu52i.d $a0, $zero, 1022 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -928 vst $vr0, $s3, 0 addi.d $a0, $s3, 16 st.d $a0, $sp, 456 @@ -8493,10 +8492,9 @@ _ZN18CanonicalCenteringILi2EEC2Ev: # @_ZN18CanonicalCenteringILi2EEC2Ev pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 move $s1, $a0 - lu52i.d $a0, $zero, 1022 - vreplgr2vr.d $vr0, $a0 - vstx $vr0, $s1, $s0 - move $s2, $s1 + vldi $vr0, -928 + vstx $vr0, $a0, $s0 + move $s2, $a0 beq $fp, $s3, .LBB122_20 # %bb.18: # %.lr.ph.i.i.i.i.i.i.i.preheader move $s2, $s1 @@ -10716,8 +10714,7 @@ _ZN18CanonicalCenteringILi2EEC2Ev: # @_ZN18CanonicalCenteringILi2EEC2Ev st.d $s5, $sp, 432 beq $s6, $s3, .LBB122_335 .LBB122_297: - lu52i.d $a0, $zero, 1023 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -912 vst $vr0, $s6, 0 addi.d $s6, $s6, 16 st.d $s6, $sp, 456 @@ -11072,10 +11069,9 @@ _ZN18CanonicalCenteringILi2EEC2Ev: # @_ZN18CanonicalCenteringILi2EEC2Ev pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 move $s1, $a0 - lu52i.d $a0, $zero, 1023 - vreplgr2vr.d $vr0, $a0 - vstx $vr0, $s1, $s0 - move $s2, $s1 + vldi $vr0, -912 + vstx $vr0, $a0, $s0 + move $s2, $a0 beq $fp, $s3, .LBB122_339 # %bb.337: # %.lr.ph.i.i.i.i.i.i.i476.preheader move $s2, $s1 @@ -12549,9 +12545,9 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev ld.d $a0, $sp, 656 beq $s2, $a0, .LBB130_16 .LBB130_8: - lu52i.d $a0, $zero, 1022 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -928 vst $vr0, $s2, 0 + lu52i.d $a0, $zero, 1022 st.d $a0, $s2, 16 addi.d $a0, $s2, 24 st.d $a0, $sp, 648 @@ -12668,9 +12664,9 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev jirl $ra, $ra, 0 move $s1, $a0 add.d $a0, $a0, $s0 - lu52i.d $a1, $zero, 1022 - vreplgr2vr.d $vr0, $a1 + vldi $vr0, -928 vstx $vr0, $s1, $s0 + lu52i.d $a1, $zero, 1022 st.d $a1, $a0, 16 move $s4, $s1 beq $fp, $s2, .LBB130_20 @@ -14266,8 +14262,8 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev st.d $zero, $s3, 0 ori $a0, $zero, 1 st.w $a0, $s3, 8 - addi.d $s4, $s3, 12 - st.d $s4, $sp, 592 + addi.d $s5, $s3, 12 + st.d $s5, $sp, 592 b .LBB130_182 .LBB130_175: ld.d $fp, $sp, 584 @@ -14332,63 +14328,62 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB130_181: # %_ZNSt6vectorI3LocILi3EESaIS1_EE17_M_realloc_appendIJRKS1_EEEvDpOT_.exit898 - addi.d $s4, $s4, 12 + addi.d $s5, $s4, 12 st.d $s1, $sp, 584 - st.d $s4, $sp, 592 + st.d $s5, $sp, 592 ori $a0, $zero, 12 mul.d $a0, $s2, $a0 add.d $s2, $s1, $a0 st.d $s2, $sp, 600 .LBB130_182: # %_ZNSt6vectorI3LocILi3EESaIS1_EE9push_backERKS1_.exit.i157 - ld.d $s5, $sp, 448 + ld.d $s4, $sp, 448 ld.d $s3, $sp, 456 pcalau12i $a0, %pc_hi20(.LCPI130_4) st.d $a0, $sp, 16 # 8-byte Folded Spill - beq $s5, $s3, .LBB130_188 + beq $s4, $s3, .LBB130_188 # %bb.183: vld $vr0, $a0, %pc_lo12(.LCPI130_4) - vst $vr0, $s5, 0 + vst $vr0, $s4, 0 lu52i.d $a0, $zero, 1022 - st.d $a0, $s5, 16 - addi.d $s5, $s5, 24 - st.d $s5, $sp, 448 - beq $s4, $s2, .LBB130_195 + st.d $a0, $s4, 16 + addi.d $s4, $s4, 24 + st.d $s4, $sp, 448 + beq $s5, $s2, .LBB130_195 .LBB130_184: - st.d $zero, $s4, 0 + st.d $zero, $s5, 0 ori $a0, $zero, 1 - st.w $a0, $s4, 8 - addi.d $s4, $s4, 12 - st.d $s4, $sp, 592 - beq $s5, $s3, .LBB130_202 + st.w $a0, $s5, 8 + addi.d $s5, $s5, 12 + st.d $s5, $sp, 592 + beq $s4, $s3, .LBB130_202 .LBB130_185: - lu52i.d $a0, $zero, 1023 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $s5, 0 + vldi $vr0, -912 + vst $vr0, $s4, 0 lu52i.d $a0, $zero, 1022 - st.d $a0, $s5, 16 - addi.d $s5, $s5, 24 - st.d $s5, $sp, 448 - beq $s4, $s2, .LBB130_209 + st.d $a0, $s4, 16 + addi.d $s4, $s4, 24 + st.d $s4, $sp, 448 + beq $s5, $s2, .LBB130_209 .LBB130_186: - st.d $zero, $s4, 0 + st.d $zero, $s5, 0 ori $a0, $zero, 1 - st.w $a0, $s4, 8 - addi.d $a0, $s4, 12 + st.w $a0, $s5, 8 + addi.d $a0, $s5, 12 st.d $a0, $sp, 592 pcalau12i $a0, %pc_hi20(.LCPI130_5) st.d $a0, $sp, 32 # 8-byte Folded Spill - beq $s5, $s3, .LBB130_216 + beq $s4, $s3, .LBB130_216 .LBB130_187: vld $vr0, $a0, %pc_lo12(.LCPI130_5) - vst $vr0, $s5, 0 + vst $vr0, $s4, 0 lu52i.d $a0, $zero, 1022 - st.d $a0, $s5, 16 - addi.d $a0, $s5, 24 + st.d $a0, $s4, 16 + addi.d $a0, $s4, 24 st.d $a0, $sp, 448 b .LBB130_223 .LBB130_188: ld.d $fp, $sp, 440 - sub.d $s0, $s5, $fp + sub.d $s0, $s4, $fp addi.w $a0, $zero, -8 lu52i.d $a0, $a0, 2047 beq $s0, $a0, .LBB130_835 @@ -14428,7 +14423,7 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev lu52i.d $a1, $zero, 1022 st.d $a1, $a0, 16 move $s6, $s1 - beq $fp, $s5, .LBB130_192 + beq $fp, $s4, .LBB130_192 # %bb.190: # %.lr.ph.i.i.i.i.i.i880.preheader move $s6, $s1 move $a0, $fp @@ -14443,7 +14438,7 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev fst.d $fa0, $s6, 16 addi.d $a0, $a0, 24 addi.d $s6, $s6, 24 - bne $a0, $s5, .LBB130_191 + bne $a0, $s4, .LBB130_191 .LBB130_192: # %_ZSt34__uninitialized_move_if_noexcept_aIP6VectorILi3Ed4FullES3_SaIS2_EET0_T_S6_S5_RT1_.exit.i884 beqz $fp, .LBB130_194 # %bb.193: @@ -14451,17 +14446,17 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev move $a1, $s0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $s4, $sp, 592 + ld.d $s5, $sp, 592 ld.d $s2, $sp, 600 .LBB130_194: # %_ZNSt6vectorI6VectorILi3Ed4FullESaIS2_EE17_M_realloc_appendIJRKS2_EEEvDpOT_.exit887 - addi.d $s5, $s6, 24 + addi.d $s4, $s6, 24 st.d $s1, $sp, 440 - st.d $s5, $sp, 448 + st.d $s4, $sp, 448 ori $a0, $zero, 24 mul.d $a0, $s3, $a0 add.d $s3, $s1, $a0 st.d $s3, $sp, 456 - bne $s4, $s2, .LBB130_184 + bne $s5, $s2, .LBB130_184 .LBB130_195: ld.d $fp, $sp, 584 sub.d $s0, $s2, $fp @@ -14475,10 +14470,10 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev lu32i.d $a1, -349526 lu52i.d $a1, $a1, -1366 mul.d $a0, $a0, $a1 - ori $s4, $zero, 1 - sltu $a1, $s4, $a0 + ori $s5, $zero, 1 + sltu $a1, $s5, $a0 maskeqz $a2, $a0, $a1 - masknez $a1, $s4, $a1 + masknez $a1, $s5, $a1 or $a1, $a2, $a1 add.d $a0, $a1, $a0 sltu $a1, $a0, $a1 @@ -14499,23 +14494,23 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev move $s1, $a0 add.d $a0, $a0, $s0 stx.d $zero, $s1, $s0 - st.w $s4, $a0, 8 - move $s4, $s1 + st.w $s5, $a0, 8 + move $s5, $s1 beq $fp, $s2, .LBB130_199 # %bb.197: # %.lr.ph.i.i.i.i.i.i913.preheader - move $s4, $s1 + move $s5, $s1 move $a0, $fp .p2align 4, , 16 .LBB130_198: # %.lr.ph.i.i.i.i.i.i913 # =>This Inner Loop Header: Depth=1 ld.w $a1, $a0, 0 - st.w $a1, $s4, 0 + st.w $a1, $s5, 0 ld.w $a1, $a0, 4 - st.w $a1, $s4, 4 + st.w $a1, $s5, 4 ld.w $a1, $a0, 8 - st.w $a1, $s4, 8 + st.w $a1, $s5, 8 addi.d $a0, $a0, 12 - addi.d $s4, $s4, 12 + addi.d $s5, $s5, 12 bne $a0, $s2, .LBB130_198 .LBB130_199: # %_ZSt34__uninitialized_move_if_noexcept_aIP3LocILi3EES2_SaIS1_EET0_T_S5_S4_RT1_.exit.i917 beqz $fp, .LBB130_201 @@ -14524,17 +14519,17 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev move $a1, $s0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $s5, $sp, 448 + ld.d $s4, $sp, 448 ld.d $s3, $sp, 456 .LBB130_201: # %_ZNSt6vectorI3LocILi3EESaIS1_EE17_M_realloc_appendIJRKS1_EEEvDpOT_.exit920 - addi.d $s4, $s4, 12 + addi.d $s5, $s5, 12 st.d $s1, $sp, 584 - st.d $s4, $sp, 592 + st.d $s5, $sp, 592 ori $a0, $zero, 12 mul.d $a0, $s6, $a0 add.d $s2, $s1, $a0 st.d $s2, $sp, 600 - bne $s5, $s3, .LBB130_185 + bne $s4, $s3, .LBB130_185 .LBB130_202: ld.d $fp, $sp, 440 sub.d $s0, $s3, $fp @@ -14571,27 +14566,26 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev jirl $ra, $ra, 0 move $s1, $a0 add.d $a0, $a0, $s0 - lu52i.d $a1, $zero, 1023 - vreplgr2vr.d $vr0, $a1 + vldi $vr0, -912 vstx $vr0, $s1, $s0 lu52i.d $a1, $zero, 1022 st.d $a1, $a0, 16 - move $s5, $s1 + move $s4, $s1 beq $fp, $s3, .LBB130_206 # %bb.204: # %.lr.ph.i.i.i.i.i.i902.preheader - move $s5, $s1 + move $s4, $s1 move $a0, $fp .p2align 4, , 16 .LBB130_205: # %.lr.ph.i.i.i.i.i.i902 # =>This Inner Loop Header: Depth=1 fld.d $fa0, $a0, 0 - fst.d $fa0, $s5, 0 + fst.d $fa0, $s4, 0 fld.d $fa0, $a0, 8 - fst.d $fa0, $s5, 8 + fst.d $fa0, $s4, 8 fld.d $fa0, $a0, 16 - fst.d $fa0, $s5, 16 + fst.d $fa0, $s4, 16 addi.d $a0, $a0, 24 - addi.d $s5, $s5, 24 + addi.d $s4, $s4, 24 bne $a0, $s3, .LBB130_205 .LBB130_206: # %_ZSt34__uninitialized_move_if_noexcept_aIP6VectorILi3Ed4FullES3_SaIS2_EET0_T_S6_S5_RT1_.exit.i906 beqz $fp, .LBB130_208 @@ -14600,17 +14594,17 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev move $a1, $s0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $s4, $sp, 592 + ld.d $s5, $sp, 592 ld.d $s2, $sp, 600 .LBB130_208: # %_ZNSt6vectorI6VectorILi3Ed4FullESaIS2_EE17_M_realloc_appendIJRKS2_EEEvDpOT_.exit909 - addi.d $s5, $s5, 24 + addi.d $s4, $s4, 24 st.d $s1, $sp, 440 - st.d $s5, $sp, 448 + st.d $s4, $sp, 448 ori $a0, $zero, 24 mul.d $a0, $s6, $a0 add.d $s3, $s1, $a0 st.d $s3, $sp, 456 - bne $s4, $s2, .LBB130_186 + bne $s5, $s2, .LBB130_186 .LBB130_209: ld.d $fp, $sp, 584 sub.d $s0, $s2, $fp @@ -14640,9 +14634,9 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev or $a0, $a0, $a3 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 - or $s4, $a1, $a0 - slli.d $a0, $s4, 3 - alsl.d $a0, $s4, $a0, 2 + or $s5, $a1, $a0 + slli.d $a0, $s5, 3 + alsl.d $a0, $s5, $a0, 2 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 move $s1, $a0 @@ -14673,19 +14667,19 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev move $a1, $s0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $s5, $sp, 448 + ld.d $s4, $sp, 448 ld.d $s3, $sp, 456 .LBB130_215: # %_ZNSt6vectorI3LocILi3EESaIS1_EE17_M_realloc_appendIJRKS1_EEEvDpOT_.exit942 addi.d $a0, $s6, 12 st.d $s1, $sp, 584 st.d $a0, $sp, 592 ori $a0, $zero, 12 - mul.d $a0, $s4, $a0 + mul.d $a0, $s5, $a0 add.d $a0, $s1, $a0 st.d $a0, $sp, 600 pcalau12i $a0, %pc_hi20(.LCPI130_5) st.d $a0, $sp, 32 # 8-byte Folded Spill - bne $s5, $s3, .LBB130_187 + bne $s4, $s3, .LBB130_187 .LBB130_216: ld.d $fp, $sp, 440 sub.d $s0, $s3, $fp @@ -16671,8 +16665,7 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev ld.d $a0, $sp, 432 beq $s2, $a0, .LBB130_463 .LBB130_441: - lu52i.d $a0, $zero, 1022 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -928 vst $vr0, $s2, 0 st.d $zero, $s2, 16 addi.d $a0, $s2, 24 @@ -16927,8 +16920,7 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev jirl $ra, $ra, 0 move $s1, $a0 add.d $a0, $a0, $s0 - lu52i.d $a1, $zero, 1022 - vreplgr2vr.d $vr0, $a1 + vldi $vr0, -928 vstx $vr0, $s1, $s0 st.d $zero, $a0, 16 move $s4, $s1 @@ -16987,8 +16979,7 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev ld.d $a0, $sp, 456 beq $s2, $a0, .LBB130_480 .LBB130_472: - lu52i.d $a0, $zero, 1022 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -928 vst $vr0, $s2, 0 lu52i.d $a0, $zero, 1023 st.d $a0, $s2, 16 @@ -17100,8 +17091,7 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev jirl $ra, $ra, 0 move $s1, $a0 add.d $a0, $a0, $s0 - lu52i.d $a1, $zero, 1022 - vreplgr2vr.d $vr0, $a1 + vldi $vr0, -928 vstx $vr0, $s1, $s0 lu52i.d $a1, $zero, 1023 st.d $a1, $a0, 16 @@ -18768,8 +18758,7 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev st.d $s4, $sp, 624 beq $s5, $s3, .LBB130_729 .LBB130_683: - lu52i.d $a0, $zero, 1023 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -912 vst $vr0, $s5, 0 st.d $zero, $s5, 16 addi.d $s5, $s5, 24 @@ -18811,9 +18800,9 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev st.d $s4, $sp, 624 beq $s5, $s3, .LBB130_771 .LBB130_689: - lu52i.d $a0, $zero, 1023 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -912 vst $vr0, $s5, 0 + lu52i.d $a0, $zero, 1023 st.d $a0, $s5, 16 addi.d $s5, $s5, 24 st.d $s5, $sp, 648 @@ -19239,8 +19228,7 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev jirl $ra, $ra, 0 move $s1, $a0 add.d $a0, $a0, $s0 - lu52i.d $a1, $zero, 1023 - vreplgr2vr.d $vr0, $a1 + vldi $vr0, -912 vstx $vr0, $s1, $s0 st.d $zero, $a0, 16 move $s5, $s1 @@ -19669,9 +19657,9 @@ _ZN18CanonicalCenteringILi3EEC2Ev: # @_ZN18CanonicalCenteringILi3EEC2Ev jirl $ra, $ra, 0 move $s1, $a0 add.d $a0, $a0, $s0 - lu52i.d $a1, $zero, 1023 - vreplgr2vr.d $vr0, $a1 + vldi $vr0, -912 vstx $vr0, $s1, $s0 + lu52i.d $a1, $zero, 1023 st.d $a1, $a0, 16 move $s5, $s1 beq $fp, $s3, .LBB130_775 @@ -23427,11 +23415,11 @@ main: # @main move $a1, $fp pcaddu18i $ra, %call36(_Z15handle_cmd_argsiPPc) jirl $ra, $ra, 0 - lu52i.d $a0, $zero, -1025 - vreplgr2vr.d $vr0, $a0 - pcalau12i $a1, %pc_hi20(origin) - addi.d $s0, $a1, %pc_lo12(origin) + pcalau12i $a0, %pc_hi20(origin) + addi.d $s0, $a0, %pc_lo12(origin) + vldi $vr0, -784 vst $vr0, $s0, 0 + lu52i.d $a0, $zero, -1025 st.d $a0, $s0, 16 pcalau12i $a0, %pc_hi20(vdom) addi.d $a0, $a0, %pc_lo12(vdom) @@ -130414,8 +130402,6 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR add.d $a5, $fp, $a0 add.d $s2, $s1, $a0 add.d $ra, $s1, $t5 - lu52i.d $a0, $zero, 1020 - xvreplgr2vr.d $xr0, $a0 add.d $a0, $t1, $a6 st.d $a0, $sp, 312 # 8-byte Folded Spill add.w $a0, $a3, $t8 @@ -130439,7 +130425,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR alsl.d $a0, $a0, $a2, 2 st.d $a0, $sp, 232 # 8-byte Folded Spill ld.d $a2, $sp, 224 # 8-byte Folded Reload - vldi $vr1, -960 + vldi $vr0, -960 + xvldi $xr1, -960 st.d $a3, $sp, 40 # 8-byte Folded Spill addi.d $a0, $a3, 1 st.d $a0, $sp, 16 # 8-byte Folded Spill @@ -130600,7 +130587,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR fadd.d $fa3, $fa4, $fa5 fmul.d $fa3, $fa3, $fa3 fadd.d $fa2, $fa2, $fa3 - fmul.d $fa2, $fa2, $fa1 + fmul.d $fa2, $fa2, $fa0 add.w $s1, $a4, $a0 slli.d $s1, $s1, 3 fstx.d $fa2, $t7, $s1 @@ -130778,7 +130765,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR xvfadd.d $xr3, $xr4, $xr5 xvfmul.d $xr3, $xr3, $xr3 xvfadd.d $xr2, $xr2, $xr3 - xvfmul.d $xr2, $xr2, $xr0 + xvfmul.d $xr2, $xr2, $xr1 add.w $a4, $a6, $a0 slli.d $a4, $a4, 3 xvstx $xr2, $t7, $a4 @@ -155485,8 +155472,6 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR slli.d $a3, $a0, 3 addi.d $a3, $a3, 8 add.d $t6, $a2, $t6 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 add.d $a2, $a1, $a3 st.d $a2, $sp, 200 # 8-byte Folded Spill add.d $a2, $a7, $a3 @@ -155500,7 +155485,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR st.d $a2, $sp, 192 # 8-byte Folded Spill alsl.d $a0, $a0, $t8, 2 st.d $a0, $sp, 176 # 8-byte Folded Spill - vldi $vr1, -928 + vldi $vr0, -928 + xvldi $xr1, -928 st.d $s8, $sp, 40 # 8-byte Folded Spill addi.d $s8, $s8, -1 st.d $ra, $sp, 32 # 8-byte Folded Spill @@ -155615,8 +155601,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR slli.d $t1, $t1, 3 fldx.d $fa5, $t2, $t1 slli.d $a2, $a2, 3 - fmul.d $fa3, $fa3, $fa1 - fmadd.d $fa2, $fa2, $fa1, $fa3 + fmul.d $fa3, $fa3, $fa0 + fmadd.d $fa2, $fa2, $fa0, $fa3 fsub.d $fa3, $fa4, $fa5 fld.d $fa4, $t5, 0 fldx.d $fa5, $a1, $a2 @@ -155758,8 +155744,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR add.w $t1, $a3, $a4 slli.d $t1, $t1, 3 xvldx $xr6, $t2, $t1 - xvfmul.d $xr4, $xr4, $xr0 - xvfmadd.d $xr3, $xr3, $xr0, $xr4 + xvfmul.d $xr4, $xr4, $xr1 + xvfmadd.d $xr3, $xr3, $xr1, $xr4 xvldx $xr4, $a1, $a2 xvfsub.d $xr5, $xr5, $xr6 xvbitrevi.d $xr5, $xr5, 63 @@ -158449,18 +158435,16 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR alsl.d $t7, $t7, $t8, 3 add.d $t6, $t7, $t6 pcalau12i $t7, %pc_hi20(.LCPI721_0) - vld $vr1, $t7, %pc_lo12(.LCPI721_0) + vld $vr0, $t7, %pc_lo12(.LCPI721_0) addi.d $t7, $t2, -1 mul.d $t7, $t0, $t7 add.w $a4, $a4, $t7 - vreplgr2vr.w $vr2, $a2 + vreplgr2vr.w $vr1, $a2 add.d $a2, $s8, $a2 addi.d $a0, $a0, 136 st.d $a0, $sp, 200 # 8-byte Folded Spill add.d $t2, $a3, $t7 add.d $t7, $a3, $a6 - lu52i.d $a0, $zero, 1022 - xvreplgr2vr.d $xr0, $a0 slli.d $a0, $t1, 3 addi.d $a0, $a0, 8 add.d $a3, $a1, $a0 @@ -158476,8 +158460,9 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR st.d $a3, $sp, 184 # 8-byte Folded Spill alsl.d $a0, $a0, $t3, 2 st.d $a0, $sp, 160 # 8-byte Folded Spill - vadd.w $vr1, $vr2, $vr1 - vldi $vr2, -928 + vadd.w $vr0, $vr1, $vr0 + vldi $vr1, -928 + xvldi $xr2, -928 st.d $a2, $sp, 16 # 8-byte Folded Spill addi.d $a0, $a2, 1 st.d $a0, $sp, 8 # 8-byte Folded Spill @@ -158590,8 +158575,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR mul.d $t6, $t6, $a6 fldx.d $fa6, $t4, $t6 slli.d $t2, $t2, 3 - fmul.d $fa4, $fa4, $fa2 - fmadd.d $fa3, $fa3, $fa2, $fa4 + fmul.d $fa4, $fa4, $fa1 + fmadd.d $fa3, $fa3, $fa1, $fa4 fsub.d $fa4, $fa5, $fa6 fld.d $fa5, $t5, 0 fldx.d $fa6, $a1, $t2 @@ -158739,7 +158724,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR move $a3, $ra move $s6, $s2 move $s1, $s3 - vori.b $vr6, $vr1, 0 + vori.b $vr6, $vr0, 0 .p2align 4, , 16 .LBB721_24: # %vector.body # Parent Loop BB721_6 Depth=1 @@ -158750,8 +158735,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR xvldx $xr7, $a7, $t2 xvldx $xr8, $a7, $a0 slli.d $a0, $s1, 3 - xvfmul.d $xr7, $xr7, $xr0 - xvfmadd.d $xr7, $xr8, $xr0, $xr7 + xvfmul.d $xr7, $xr7, $xr2 + xvfmadd.d $xr7, $xr8, $xr2, $xr7 vadd.w $vr8, $vr6, $vr5 vpickve2gr.w $t2, $vr8, 3 vpickve2gr.w $t6, $vr8, 2 @@ -159966,11 +159951,9 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR alsl.d $a4, $t7, $a4, 3 add.d $a4, $a4, $t6 add.d $t6, $a6, $a1 - vreplgr2vr.w $vr1, $a1 + vreplgr2vr.w $vr0, $a1 pcalau12i $a1, %pc_hi20(.LCPI731_0) - vld $vr2, $a1, %pc_lo12(.LCPI731_0) - lu52i.d $a1, $zero, 1022 - xvreplgr2vr.d $xr0, $a1 + vld $vr1, $a1, %pc_lo12(.LCPI731_0) slli.d $a1, $a3, 3 addi.d $a1, $a1, 8 addi.d $ra, $t2, 128 @@ -159989,8 +159972,9 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR st.d $a3, $sp, 200 # 8-byte Folded Spill alsl.d $a1, $a1, $a2, 2 st.d $a1, $sp, 160 # 8-byte Folded Spill - vadd.w $vr1, $vr1, $vr2 - vldi $vr2, -928 + vadd.w $vr0, $vr0, $vr1 + vldi $vr1, -928 + xvldi $xr2, -928 st.d $t6, $sp, 16 # 8-byte Folded Spill addi.d $a1, $t6, 1 st.d $a1, $sp, 8 # 8-byte Folded Spill @@ -160111,8 +160095,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR fldx.d $fa6, $t5, $t6 mul.d $t3, $t3, $s5 fldx.d $fa7, $t5, $t3 - fmul.d $fa5, $fa5, $fa2 - fmadd.d $fa4, $fa4, $fa2, $fa5 + fmul.d $fa5, $fa5, $fa1 + fmadd.d $fa4, $fa4, $fa1, $fa5 fld.d $fa5, $ra, 0 fsub.d $fa6, $fa6, $fa7 addi.w $t3, $a4, 0 @@ -160270,7 +160254,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR move $t4, $s0 move $s3, $s6 move $s1, $s4 - vori.b $vr7, $vr1, 0 + vori.b $vr7, $vr0, 0 .p2align 4, , 16 .LBB731_24: # %vector.body # Parent Loop BB731_5 Depth=1 @@ -160282,8 +160266,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR xvldx $xr9, $t0, $a6 slli.d $a6, $s1, 3 xvldx $xr10, $a5, $a6 - xvfmul.d $xr8, $xr8, $xr0 - xvfmadd.d $xr8, $xr9, $xr0, $xr8 + xvfmul.d $xr8, $xr8, $xr2 + xvfmadd.d $xr8, $xr9, $xr2, $xr8 vadd.w $vr9, $vr7, $vr6 vpickve2gr.w $t3, $vr9, 3 vpickve2gr.w $t6, $vr9, 2 @@ -162340,8 +162324,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR st.d $a0, $sp, 120 # 8-byte Folded Spill ori $s6, $zero, 7 vldi $vr0, -928 - lu52i.d $a0, $zero, 1022 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -928 st.d $a2, $sp, 24 # 8-byte Folded Spill st.d $s3, $sp, 32 # 8-byte Folded Spill st.d $t6, $sp, 48 # 8-byte Folded Spill @@ -163433,8 +163416,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR add.d $s6, $a0, $a3 ori $s7, $zero, 7 vldi $vr0, -928 - lu52i.d $a0, $zero, 1022 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -928 st.d $s8, $sp, 24 # 8-byte Folded Spill st.d $ra, $sp, 16 # 8-byte Folded Spill st.d $fp, $sp, 32 # 8-byte Folded Spill @@ -164478,12 +164460,12 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR blt $a4, $a2, .LBB763_1 # %bb.4: # %.preheader25.lr.ph.split.split.i ld.d $a4, $a0, 0 - ld.d $a7, $a4, 80 - ld.d $t0, $a4, 72 + ld.d $a0, $a4, 80 + ld.d $a7, $a4, 72 move $t6, $zero - ld.d $a0, $a7, 8 + ld.d $a0, $a0, 8 ori $t1, $zero, 88 - mul.d $a7, $t0, $t1 + mul.d $a7, $a7, $t1 ld.d $t0, $a4, 224 add.d $a0, $a0, $a7 ld.d $a7, $a0, 72 @@ -164525,8 +164507,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR st.d $a0, $sp, 136 # 8-byte Folded Spill ori $s7, $zero, 7 vldi $vr0, -928 - lu52i.d $a0, $zero, 1022 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -928 st.d $s8, $sp, 24 # 8-byte Folded Spill st.d $ra, $sp, 16 # 8-byte Folded Spill st.d $s4, $sp, 32 # 8-byte Folded Spill @@ -186309,9 +186290,9 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR add.d $t6, $t7, $t6 add.d $t7, $a4, $a6 st.d $t7, $sp, 24 # 8-byte Folded Spill - vreplgr2vr.w $vr1, $a2 + vreplgr2vr.w $vr0, $a2 pcalau12i $a2, %pc_hi20(.LCPI857_0) - vld $vr2, $a2, %pc_lo12(.LCPI857_0) + vld $vr1, $a2, %pc_lo12(.LCPI857_0) mul.d $a2, $a5, $t2 add.d $a4, $a4, $a2 st.d $a4, $sp, 16 # 8-byte Folded Spill @@ -186319,8 +186300,6 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR st.d $a0, $sp, 184 # 8-byte Folded Spill add.d $a4, $a3, $a2 add.d $a3, $a3, $a6 - lu52i.d $a0, $zero, 1022 - xvreplgr2vr.d $xr0, $a0 slli.d $a0, $t3, 3 addi.d $a0, $a0, 8 add.d $a2, $a1, $a0 @@ -186336,8 +186315,9 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR st.d $a2, $sp, 168 # 8-byte Folded Spill alsl.d $a0, $a0, $t1, 2 st.d $a0, $sp, 152 # 8-byte Folded Spill - vadd.w $vr1, $vr1, $vr2 - vldi $vr2, -928 + vadd.w $vr0, $vr0, $vr1 + vldi $vr1, -928 + xvldi $xr2, -928 addi.d $a0, $s8, -1 st.d $a0, $sp, 8 # 8-byte Folded Spill ori $a6, $zero, 24 @@ -186449,8 +186429,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR mul.d $t6, $t6, $a6 fldx.d $fa7, $t4, $t6 slli.d $a5, $a5, 3 - fmul.d $fa5, $fa5, $fa2 - fmadd.d $fa4, $fa4, $fa2, $fa5 + fmul.d $fa5, $fa5, $fa1 + fmadd.d $fa4, $fa4, $fa1, $fa5 fsub.d $fa5, $fa6, $fa7 fld.d $fa6, $t5, 0 fldx.d $fa7, $a1, $a5 @@ -186596,7 +186576,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR ld.d $a4, $sp, 168 # 8-byte Folded Reload move $s5, $a3 move $s6, $s8 - vori.b $vr7, $vr1, 0 + vori.b $vr7, $vr0, 0 .p2align 4, , 16 .LBB857_24: # %vector.body # Parent Loop BB857_6 Depth=1 @@ -186608,7 +186588,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR slli.d $ra, $s6, 3 slli.d $a5, $s5, 3 xvldx $xr9, $a7, $a5 - xvfmul.d $xr8, $xr8, $xr0 + xvfmul.d $xr8, $xr8, $xr2 vadd.w $vr10, $vr7, $vr5 vpickve2gr.w $a5, $vr10, 3 vpickve2gr.w $t6, $vr10, 2 @@ -186622,7 +186602,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR fldx.d $ft3, $t4, $t7 fldx.d $ft4, $t4, $t6 fldx.d $ft5, $t4, $a5 - xvfmadd.d $xr8, $xr9, $xr0, $xr8 + xvfmadd.d $xr8, $xr9, $xr2, $xr8 xvinsve0.d $xr10, $xr11, 1 xvinsve0.d $xr10, $xr12, 2 xvinsve0.d $xr10, $xr13, 3 @@ -187505,8 +187485,6 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR bstrpick.d $a0, $s0, 31, 0 add.d $s7, $a2, $t6 add.d $ra, $a2, $t1 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 slli.d $a2, $a0, 3 addi.d $a2, $a2, 8 add.d $t1, $a1, $a2 @@ -187523,7 +187501,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR ld.d $a2, $sp, 304 # 8-byte Folded Reload alsl.d $a0, $a0, $a2, 2 st.d $a0, $sp, 184 # 8-byte Folded Spill - vldi $vr1, -928 + vldi $vr0, -928 + xvldi $xr1, -928 st.d $a5, $sp, 16 # 8-byte Folded Spill st.d $a4, $sp, 8 # 8-byte Folded Spill st.d $s5, $sp, 32 # 8-byte Folded Spill @@ -187647,8 +187626,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR slli.d $fp, $fp, 3 fldx.d $fa5, $t2, $fp slli.d $a5, $a5, 3 - fmul.d $fa3, $fa3, $fa1 - fmadd.d $fa2, $fa2, $fa1, $fa3 + fmul.d $fa3, $fa3, $fa0 + fmadd.d $fa2, $fa2, $fa0, $fa3 fsub.d $fa3, $fa4, $fa5 fld.d $fa4, $t5, 0 fldx.d $fa5, $a1, $a5 @@ -187796,8 +187775,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR xvldx $xr5, $t2, $s5 slli.d $s5, $a4, 3 xvldx $xr6, $t2, $s5 - xvfmul.d $xr4, $xr4, $xr0 - xvfmadd.d $xr3, $xr3, $xr0, $xr4 + xvfmul.d $xr4, $xr4, $xr1 + xvfmadd.d $xr3, $xr3, $xr1, $xr4 xvldx $xr4, $a1, $a5 xvfsub.d $xr5, $xr5, $xr6 xvbitrevi.d $xr5, $xr5, 63 @@ -188693,17 +188672,15 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR alsl.d $t2, $t4, $t2, 3 add.d $t2, $t2, $t3 pcalau12i $t3, %pc_hi20(.LCPI873_0) - vld $vr1, $t3, %pc_lo12(.LCPI873_0) + vld $vr0, $t3, %pc_lo12(.LCPI873_0) st.d $s2, $sp, 312 # 8-byte Folded Spill mul.d $t3, $t7, $s2 add.d $a3, $a3, $t3 st.d $a3, $sp, 32 # 8-byte Folded Spill add.d $t7, $a2, $t3 add.d $a3, $a2, $a6 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 add.w $a2, $s5, $a1 - vreplgr2vr.w $vr2, $a1 + vreplgr2vr.w $vr1, $a1 add.w $ra, $s1, $a1 add.w $t3, $s6, $a1 slli.d $a1, $a7, 3 @@ -188724,8 +188701,9 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR st.d $a4, $sp, 200 # 8-byte Folded Spill alsl.d $a1, $a1, $t1, 2 st.d $a1, $sp, 168 # 8-byte Folded Spill - vadd.w $vr1, $vr2, $vr1 - vldi $vr2, -928 + vadd.w $vr0, $vr1, $vr0 + vldi $vr1, -928 + xvldi $xr2, -928 addi.d $a1, $a0, 8 st.d $a1, $sp, 192 # 8-byte Folded Spill ori $t4, $zero, 24 @@ -188825,8 +188803,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR move $s0, $a1 move $s4, $s3 move $s8, $s5 - move $a3, $t6 - move $a7, $t7 + move $a7, $t6 + move $a3, $t7 .p2align 4, , 16 .LBB873_8: # %scalar.ph # Parent Loop BB873_5 Depth=1 @@ -188843,14 +188821,14 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR add.w $t3, $a4, $s8 slli.d $t3, $t3, 3 fldx.d $fa6, $t0, $t3 - add.w $t3, $a4, $a3 + add.w $t3, $a4, $a7 mul.d $t3, $t3, $t4 fldx.d $fa7, $t5, $t3 - add.w $t3, $a4, $a7 + add.w $t3, $a4, $a3 mul.d $t3, $t3, $t4 fldx.d $ft0, $t5, $t3 - fmul.d $fa6, $fa6, $fa2 - fmadd.d $fa5, $fa5, $fa2, $fa6 + fmul.d $fa6, $fa6, $fa1 + fmadd.d $fa5, $fa5, $fa1, $fa6 fld.d $fa6, $t8, 0 fsub.d $fa7, $fa7, $ft0 addi.w $t3, $a4, 0 @@ -188858,8 +188836,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR fdiv.d $fa6, $fa7, $fa6 fmadd.d $fa4, $fa4, $fa5, $fa6 fstx.d $fa4, $a5, $a6 - addi.d $a7, $a7, 1 addi.d $a3, $a3, 1 + addi.d $a7, $a7, 1 addi.d $s8, $s8, 1 addi.d $s4, $s4, 1 addi.w $t2, $t2, -1 @@ -189018,7 +188996,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR move $s0, $ra move $s8, $s6 move $t2, $a2 - vori.b $vr8, $vr1, 0 + vori.b $vr8, $vr0, 0 .p2align 4, , 16 .LBB873_24: # %vector.body # Parent Loop BB873_5 Depth=1 @@ -189030,7 +189008,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR xvldx $xr10, $a5, $s4 slli.d $a3, $s0, 3 xvldx $xr11, $t0, $a3 - xvfmul.d $xr9, $xr9, $xr0 + xvfmul.d $xr9, $xr9, $xr2 vadd.w $vr12, $vr8, $vr6 vpickve2gr.w $a3, $vr12, 3 vpickve2gr.w $a6, $vr12, 2 @@ -189044,7 +189022,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR fldx.d $ft5, $t5, $a7 fldx.d $ft6, $t5, $a6 fldx.d $ft7, $t5, $a3 - xvfmadd.d $xr9, $xr11, $xr0, $xr9 + xvfmadd.d $xr9, $xr11, $xr2, $xr9 xvinsve0.d $xr12, $xr13, 1 xvinsve0.d $xr12, $xr14, 2 xvinsve0.d $xr12, $xr15, 3 @@ -197511,9 +197489,9 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR add.d $t3, $t6, $t3 add.d $t6, $a4, $a6 st.d $t6, $sp, 32 # 8-byte Folded Spill - vreplgr2vr.w $vr1, $a2 + vreplgr2vr.w $vr0, $a2 pcalau12i $a2, %pc_hi20(.LCPI940_0) - vld $vr2, $a2, %pc_lo12(.LCPI940_0) + vld $vr1, $a2, %pc_lo12(.LCPI940_0) mul.d $a2, $s1, $s6 add.d $a4, $a4, $a2 st.d $a4, $sp, 24 # 8-byte Folded Spill @@ -197521,8 +197499,6 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR st.d $a0, $sp, 192 # 8-byte Folded Spill add.d $a4, $a2, $a3 add.d $s0, $a3, $a6 - lu52i.d $a0, $zero, 1022 - xvreplgr2vr.d $xr0, $a0 slli.d $a0, $t1, 3 addi.d $a0, $a0, 8 add.d $a2, $a1, $a0 @@ -197538,8 +197514,9 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR st.d $a2, $sp, 176 # 8-byte Folded Spill alsl.d $a0, $a0, $t8, 2 st.d $a0, $sp, 160 # 8-byte Folded Spill - vadd.w $vr1, $vr1, $vr2 - vldi $vr2, -928 + vadd.w $vr0, $vr0, $vr1 + vldi $vr1, -928 + xvldi $xr2, -928 addi.d $a0, $ra, -1 st.d $a0, $sp, 16 # 8-byte Folded Spill ori $a6, $zero, 24 @@ -197651,8 +197628,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR mul.d $t7, $t7, $a6 fldx.d $ft0, $t4, $t7 slli.d $t6, $t6, 3 - fmul.d $fa6, $fa6, $fa2 - fmadd.d $fa5, $fa5, $fa2, $fa6 + fmul.d $fa6, $fa6, $fa1 + fmadd.d $fa5, $fa5, $fa1, $fa6 fsub.d $fa6, $fa7, $ft0 fld.d $fa7, $t5, 0 fldx.d $ft0, $a1, $t6 @@ -197794,7 +197771,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR ld.d $a4, $sp, 176 # 8-byte Folded Reload move $a2, $t1 move $s6, $s8 - vori.b $vr8, $vr1, 0 + vori.b $vr8, $vr0, 0 .p2align 4, , 16 .LBB940_24: # %vector.body # Parent Loop BB940_6 Depth=1 @@ -197806,7 +197783,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR slli.d $ra, $s6, 3 slli.d $a0, $a2, 3 xvldx $xr10, $a7, $a0 - xvfmul.d $xr9, $xr9, $xr0 + xvfmul.d $xr9, $xr9, $xr2 vadd.w $vr11, $vr8, $vr6 vpickve2gr.w $a0, $vr11, 3 vpickve2gr.w $t6, $vr11, 2 @@ -197820,7 +197797,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR fldx.d $ft4, $t4, $t7 fldx.d $ft5, $t4, $t6 fldx.d $ft6, $t4, $a0 - xvfmadd.d $xr9, $xr10, $xr0, $xr9 + xvfmadd.d $xr9, $xr10, $xr2, $xr9 xvinsve0.d $xr11, $xr12, 1 xvinsve0.d $xr11, $xr13, 2 xvinsve0.d $xr11, $xr14, 3 @@ -198714,7 +198691,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR alsl.d $t5, $t5, $s5, 3 add.d $t4, $t5, $t4 pcalau12i $t5, %pc_hi20(.LCPI948_0) - vld $vr1, $t5, %pc_lo12(.LCPI948_0) + vld $vr0, $t5, %pc_lo12(.LCPI948_0) mul.d $t5, $s6, $s4 add.d $fp, $fp, $t5 st.d $fp, $sp, 48 # 8-byte Folded Spill @@ -198725,9 +198702,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR add.w $s0, $t6, $t7 add.w $t6, $t6, $t5 add.d $t5, $t1, $t5 - add.d $t7, $t1, $t7 - lu52i.d $t1, $zero, 1022 - xvreplgr2vr.d $xr0, $t1 + add.d $t1, $t1, $t7 ld.d $s4, $a1, 72 ld.d $s5, $a4, 72 ld.d $a0, $a0, 280 @@ -198738,8 +198713,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR st.d $a1, $sp, 288 # 8-byte Folded Spill add.w $a1, $a2, $s6 st.d $a1, $sp, 24 # 8-byte Folded Spill - vreplgr2vr.w $vr2, $a5 - add.w $a1, $a7, $a5 + vreplgr2vr.w $vr1, $a5 + add.w $t7, $a7, $a5 addi.d $a2, $a0, 144 addi.d $a0, $a0, 152 st.d $a0, $sp, 208 # 8-byte Folded Spill @@ -198758,15 +198733,15 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR st.d $a3, $sp, 192 # 8-byte Folded Spill alsl.d $a0, $a0, $s3, 2 st.d $a0, $sp, 176 # 8-byte Folded Spill - vadd.w $vr1, $vr2, $vr1 - vldi $vr2, -928 + vadd.w $vr0, $vr1, $vr0 + vldi $vr1, -928 + xvldi $xr2, -928 ori $a3, $zero, 24 st.d $s0, $sp, 40 # 8-byte Folded Spill move $a5, $s0 st.d $t6, $sp, 32 # 8-byte Folded Spill move $t0, $t6 - st.d $a1, $sp, 16 # 8-byte Folded Spill - move $s0, $a1 + st.d $t7, $sp, 16 # 8-byte Folded Spill st.d $s8, $sp, 80 # 8-byte Folded Spill st.d $ra, $sp, 72 # 8-byte Folded Spill st.d $s2, $sp, 64 # 8-byte Folded Spill @@ -198777,8 +198752,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR ld.d $s7, $sp, 160 # 8-byte Folded Reload addi.d $s7, $s7, 1 ld.d $s8, $sp, 80 # 8-byte Folded Reload - ld.d $s0, $sp, 96 # 8-byte Folded Reload - add.w $s0, $s0, $s8 + ld.d $t7, $sp, 96 # 8-byte Folded Reload + add.w $t7, $t7, $s8 ld.d $ra, $sp, 72 # 8-byte Folded Reload ld.d $t0, $sp, 104 # 8-byte Folded Reload add.w $t0, $t0, $ra @@ -198791,8 +198766,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR add.d $t8, $t8, $s2 ld.d $t5, $sp, 128 # 8-byte Folded Reload add.d $t5, $t5, $ra - ld.d $t7, $sp, 120 # 8-byte Folded Reload - add.d $t7, $t7, $ra + ld.d $t1, $sp, 120 # 8-byte Folded Reload + add.d $t1, $t1, $ra ld.d $a7, $sp, 152 # 8-byte Folded Reload add.d $a7, $a7, $s8 ld.d $s6, $sp, 168 # 8-byte Folded Reload @@ -198830,8 +198805,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR vreplgr2vr.w $vr3, $a0 vreplgr2vr.w $vr4, $a1 st.d $a7, $sp, 152 # 8-byte Folded Spill - st.d $t7, $sp, 120 # 8-byte Folded Spill - move $t1, $t7 + st.d $t1, $sp, 120 # 8-byte Folded Spill st.d $t5, $sp, 128 # 8-byte Folded Spill move $a0, $t5 st.d $t8, $sp, 136 # 8-byte Folded Spill @@ -198841,8 +198815,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR st.d $a5, $sp, 112 # 8-byte Folded Spill st.d $t0, $sp, 104 # 8-byte Folded Spill move $fp, $t0 - st.d $s0, $sp, 96 # 8-byte Folded Spill - move $t8, $s0 + st.d $t7, $sp, 96 # 8-byte Folded Spill + move $t8, $t7 ld.d $s1, $sp, 88 # 8-byte Folded Reload .p2align 4, , 16 .LBB948_7: # %.preheader @@ -198880,8 +198854,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR mul.d $t0, $t0, $a3 fldx.d $ft0, $t2, $t0 slli.d $a1, $a1, 3 - fmul.d $fa6, $fa6, $fa2 - fmadd.d $fa5, $fa5, $fa2, $fa6 + fmul.d $fa6, $fa6, $fa1 + fmadd.d $fa5, $fa5, $fa1, $fa6 fsub.d $fa6, $fa7, $ft0 fld.d $fa7, $a2, 0 fldx.d $ft0, $s4, $a1 @@ -199032,7 +199006,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR move $ra, $a5 move $s0, $fp move $s8, $t8 - vori.b $vr8, $vr1, 0 + vori.b $vr8, $vr0, 0 .p2align 4, , 16 .LBB948_24: # %vector.body # Parent Loop BB948_6 Depth=1 @@ -199043,7 +199017,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR slli.d $s2, $s8, 3 slli.d $a1, $ra, 3 xvldx $xr10, $s5, $a1 - xvfmul.d $xr9, $xr9, $xr0 + xvfmul.d $xr9, $xr9, $xr2 vadd.w $vr11, $vr8, $vr6 vpickve2gr.w $a1, $vr11, 3 vpickve2gr.w $t0, $vr11, 2 @@ -199057,7 +199031,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR fldx.d $ft4, $t2, $t5 fldx.d $ft5, $t2, $t0 fldx.d $ft6, $t2, $a1 - xvfmadd.d $xr9, $xr10, $xr0, $xr9 + xvfmadd.d $xr9, $xr10, $xr2, $xr9 xvinsve0.d $xr11, $xr12, 1 xvinsve0.d $xr11, $xr13, 2 xvinsve0.d $xr11, $xr14, 3 @@ -199939,8 +199913,6 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR st.d $a0, $sp, 216 # 8-byte Folded Spill move $s8, $t1 bstrpick.d $a0, $t1, 31, 0 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 slli.d $a2, $a0, 3 addi.d $a2, $a2, 8 add.d $t1, $a1, $a2 @@ -199957,7 +199929,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR ld.d $a2, $sp, 304 # 8-byte Folded Reload alsl.d $a0, $a0, $a2, 2 st.d $a0, $sp, 184 # 8-byte Folded Spill - vldi $vr1, -928 + vldi $vr0, -928 + xvldi $xr1, -928 st.d $a4, $sp, 16 # 8-byte Folded Spill st.d $a6, $sp, 8 # 8-byte Folded Spill st.d $s7, $sp, 32 # 8-byte Folded Spill @@ -200077,8 +200050,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR slli.d $fp, $fp, 3 fldx.d $fa5, $t2, $fp slli.d $a5, $a5, 3 - fmul.d $fa3, $fa3, $fa1 - fmadd.d $fa2, $fa2, $fa1, $fa3 + fmul.d $fa3, $fa3, $fa0 + fmadd.d $fa2, $fa2, $fa0, $fa3 fsub.d $fa3, $fa4, $fa5 fld.d $fa4, $t5, 0 fldx.d $fa5, $a1, $a5 @@ -200226,8 +200199,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg3I5FieldI22UniformR xvldx $xr5, $t2, $ra slli.d $ra, $a4, 3 xvldx $xr6, $t2, $ra - xvfmul.d $xr4, $xr4, $xr0 - xvfmadd.d $xr3, $xr3, $xr0, $xr4 + xvfmul.d $xr4, $xr4, $xr1 + xvfmadd.d $xr3, $xr3, $xr1, $xr4 xvldx $xr4, $a1, $a5 xvfsub.d $xr5, $xr5, $xr6 xvbitrevi.d $xr5, $xr5, 63 @@ -215517,8 +215490,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR ld.d $a0, $a7, 360 ld.w $t6, $a4, 28 ld.d $t0, $t0, 8 - st.d $a5, $sp, 584 # 8-byte Folded Spill - ld.w $a5, $a4, 32 + ld.w $t3, $a4, 32 mul.d $a0, $a0, $a6 ld.d $t1, $a7, 800 add.d $t0, $t0, $a0 @@ -215534,10 +215506,12 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR ld.w $s3, $s6, 28 ld.w $s2, $s6, 32 mul.d $a0, $a0, $a6 - add.d $t3, $t2, $a0 - ld.w $s5, $t3, 28 + st.d $a1, $sp, 584 # 8-byte Folded Spill + move $a1, $a3 + add.d $a3, $t2, $a0 + ld.w $s5, $a3, 28 ld.d $a0, $a7, 512 - ld.w $t1, $t3, 32 + ld.w $t1, $a3, 32 ld.d $t2, $a7, 656 ld.d $t4, $a7, 504 ld.d $a0, $a0, 8 @@ -215547,8 +215521,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR add.d $t4, $a0, $t4 mul.d $a0, $t5, $a6 add.d $t5, $t2, $a0 - st.d $a5, $sp, 240 # 8-byte Folded Spill - mul.d $a0, $t8, $a5 + st.d $t3, $sp, 240 # 8-byte Folded Spill + mul.d $a0, $t8, $t3 ld.d $fp, $sp, 104 # 8-byte Folded Reload st.d $t6, $sp, 504 # 8-byte Folded Spill mul.d $a6, $fp, $t6 @@ -215567,8 +215541,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR mul.d $a0, $fp, $s3 add.d $a6, $a2, $t6 add.d $t6, $a6, $t2 - add.d $a5, $t7, $a0 - st.d $a5, $sp, 264 # 8-byte Folded Spill + add.d $a6, $t7, $a0 + st.d $a6, $sp, 264 # 8-byte Folded Spill add.d $a0, $a2, $a0 add.d $t7, $a0, $t7 st.d $t1, $sp, 216 # 8-byte Folded Spill @@ -215586,40 +215560,36 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR mul.d $a0, $t8, $s4 st.d $a6, $sp, 472 # 8-byte Folded Spill mul.d $a6, $fp, $a6 - move $s5, $a1 - move $a1, $a3 - add.d $a3, $a0, $a6 + add.d $t3, $a0, $a6 add.d $a6, $a2, $a6 add.d $a0, $a6, $a0 st.d $a0, $sp, 248 # 8-byte Folded Spill mul.d $a0, $t8, $s3 st.d $t2, $sp, 464 # 8-byte Folded Spill mul.d $a6, $fp, $t2 + move $s5, $a5 add.d $a5, $a0, $a6 add.d $a6, $a2, $a6 add.d $s1, $a6, $a0 - lu52i.d $a0, $zero, 1022 - xvreplgr2vr.d $xr0, $a0 ld.d $a0, $s7, 8 ld.d $a6, $a4, 72 ld.d $t2, $t0, 72 ld.d $a4, $a7, 280 move $s7, $s3 ld.d $s6, $s6, 72 - ld.d $t0, $t3, 72 - move $t3, $a3 + ld.d $t0, $a3, 72 ld.d $t4, $t4, 72 ld.d $s3, $t5, 72 move $t5, $t7 add.w $a3, $a1, $fp st.d $a3, $sp, 456 # 8-byte Folded Spill move $fp, $a5 - ld.d $a1, $sp, 584 # 8-byte Folded Reload - add.w $a1, $a1, $t8 + add.w $a1, $s5, $t8 st.d $a1, $sp, 72 # 8-byte Folded Spill - add.d $a1, $s5, $a2 + ld.d $a3, $sp, 584 # 8-byte Folded Reload + add.d $a1, $a3, $a2 st.d $a1, $sp, 448 # 8-byte Folded Spill - addi.w $a1, $s5, -1 + addi.w $a1, $a3, -1 ld.d $a3, $sp, 264 # 8-byte Folded Reload ld.d $a5, $sp, 272 # 8-byte Folded Reload add.d $t7, $s8, $a2 @@ -215662,7 +215632,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR alsl.d $a1, $t1, $a2, 2 st.d $a1, $sp, 280 # 8-byte Folded Spill ld.d $a2, $sp, 256 # 8-byte Folded Reload - vldi $vr1, -928 + vldi $vr0, -928 + xvldi $xr1, -928 st.d $a7, $sp, 56 # 8-byte Folded Spill addi.d $a7, $a7, -1 st.d $s5, $sp, 40 # 8-byte Folded Spill @@ -215840,14 +215811,14 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR fsub.d $fa2, $fa4, $fa2 slli.d $s2, $s2, 3 fldx.d $fa4, $t0, $s2 - fmul.d $fa5, $fa5, $fa1 + fmul.d $fa5, $fa5, $fa0 add.w $s2, $a4, $a1 slli.d $s8, $s2, 3 fldx.d $fa6, $t4, $s8 addi.w $s2, $s2, -1 slli.d $s2, $s2, 3 fldx.d $fa7, $t4, $s2 - fmadd.d $fa4, $fa4, $fa1, $fa5 + fmadd.d $fa4, $fa4, $fa0, $fa5 fldx.d $fa5, $a6, $t3 fdiv.d $fa2, $fa2, $fa4 fsub.d $fa4, $fa6, $fa7 @@ -216134,8 +216105,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR xvldx $xr7, $t0, $t6 xvfdiv.d $xr4, $xr4, $xr3 xvfsub.d $xr4, $xr5, $xr4 - xvfmul.d $xr5, $xr6, $xr0 - xvfmadd.d $xr5, $xr7, $xr0, $xr5 + xvfmul.d $xr5, $xr6, $xr1 + xvfmadd.d $xr5, $xr7, $xr1, $xr5 add.w $t6, $a3, $a1 slli.d $t6, $t6, 3 xvldx $xr6, $t4, $t6 @@ -217663,7 +217634,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR add.d $a5, $a5, $a6 ld.w $t6, $a5, 28 ld.d $a6, $a3, 368 - st.d $a1, $sp, 632 # 8-byte Folded Spill + st.d $a1, $sp, 640 # 8-byte Folded Spill ld.w $a1, $a5, 32 ld.d $a7, $a3, 800 ld.d $t1, $a3, 360 @@ -217673,13 +217644,13 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR mul.d $a7, $t1, $t0 add.d $a7, $a6, $a7 mul.d $a6, $t2, $t0 - add.d $t1, $t3, $a6 - st.d $t1, $sp, 640 # 8-byte Folded Spill - ld.w $t7, $t1, 28 + move $s8, $a0 + add.d $a0, $t3, $a6 + ld.w $t7, $a0, 28 ld.d $a6, $a3, 224 ld.d $t2, $a3, 216 st.d $a2, $sp, 648 # 8-byte Folded Spill - ld.w $a2, $t1, 32 + ld.w $a2, $a0, 32 ld.d $t3, $a3, 512 ld.d $a6, $a6, 8 mul.d $t2, $t2, $t0 @@ -217738,7 +217709,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR add.d $a2, $t0, $t7 st.d $a2, $sp, 256 # 8-byte Folded Spill ld.w $t7, $t2, 28 - add.d $s8, $t0, $t6 + add.d $s3, $t0, $t6 mul.d $t0, $t8, $a1 add.d $t4, $a4, $t0 mul.d $t6, $fp, $t7 @@ -217763,13 +217734,11 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR add.d $a1, $t0, $a6 add.d $s1, $t0, $t6 move $t6, $s7 - lu52i.d $a6, $zero, 1022 - xvreplgr2vr.d $xr0, $a6 - ld.d $s7, $a0, 8 + ld.d $s7, $s8, 8 ld.d $a6, $a5, 72 ld.d $t0, $a7, 72 + move $s8, $s3 ld.d $a5, $a3, 280 - ld.d $a0, $sp, 640 # 8-byte Folded Reload ld.d $t4, $a0, 72 ld.d $t2, $t2, 72 ld.d $s3, $t3, 72 @@ -217782,7 +217751,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR add.w $a0, $s2, $t8 st.d $a0, $sp, 40 # 8-byte Folded Spill ld.d $a2, $sp, 272 # 8-byte Folded Reload - ld.d $a1, $sp, 632 # 8-byte Folded Reload + ld.d $a1, $sp, 640 # 8-byte Folded Reload add.d $a0, $a1, $a4 st.d $a0, $sp, 472 # 8-byte Folded Spill addi.w $a0, $a1, -1 @@ -217823,7 +217792,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR alsl.d $a0, $a4, $a1, 2 st.d $a0, $sp, 280 # 8-byte Folded Spill ld.d $a1, $sp, 264 # 8-byte Folded Reload - vldi $vr1, -928 + vldi $vr0, -928 + xvldi $xr1, -928 addi.d $a0, $s7, 8 st.d $a0, $sp, 352 # 8-byte Folded Spill ld.d $s2, $sp, 104 # 8-byte Folded Reload @@ -218023,14 +217993,14 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR add.w $t3, $s2, $a0 slli.d $t3, $t3, 3 fldx.d $fa5, $t2, $t3 - fmul.d $fa3, $fa3, $fa1 + fmul.d $fa3, $fa3, $fa0 add.w $t3, $s2, $s8 slli.d $t3, $t3, 3 fldx.d $fa6, $s3, $t3 add.w $t3, $s2, $a4 slli.d $t3, $t3, 3 fldx.d $fa7, $s3, $t3 - fmadd.d $fa3, $fa5, $fa1, $fa3 + fmadd.d $fa3, $fa5, $fa0, $fa3 fldx.d $fa5, $a6, $a2 fdiv.d $fa2, $fa2, $fa3 fsub.d $fa3, $fa6, $fa7 @@ -218332,8 +218302,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg6I5FieldI22UniformR xvldx $xr7, $t2, $a4 xvfdiv.d $xr4, $xr4, $xr3 xvfsub.d $xr4, $xr5, $xr4 - xvfmul.d $xr5, $xr6, $xr0 - xvfmadd.d $xr5, $xr7, $xr0, $xr5 + xvfmul.d $xr5, $xr6, $xr1 + xvfmadd.d $xr5, $xr7, $xr1, $xr5 add.w $a4, $t3, $a0 slli.d $a4, $a4, 3 xvldx $xr6, $s3, $a4 @@ -222152,9 +222122,7 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg5I5FieldI22UniformR mul.d $t4, $t7, $s2 add.d $s2, $t3, $t4 add.d $t4, $a1, $t4 - add.d $t4, $t4, $t3 - lu52i.d $t3, $zero, 1022 - xvreplgr2vr.d $xr0, $t3 + add.d $t3, $t4, $t3 ld.d $a0, $a0, 8 ld.d $s7, $a4, 72 ld.d $s8, $a6, 72 @@ -222174,17 +222142,17 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg5I5FieldI22UniformR st.d $a2, $sp, 416 # 8-byte Folded Spill addi.w $a2, $a5, -1 ld.d $a7, $sp, 256 # 8-byte Folded Reload - add.d $t3, $t2, $a1 - st.d $t3, $sp, 48 # 8-byte Folded Spill + add.d $t4, $t2, $a1 + st.d $t4, $sp, 48 # 8-byte Folded Spill add.d $a5, $fp, $a1 add.d $t1, $a7, $a1 add.d $t5, $s2, $a1 st.d $t5, $sp, 24 # 8-byte Folded Spill add.d $t0, $s3, $a1 - add.d $t3, $s0, $a1 - st.d $t3, $sp, 88 # 8-byte Folded Spill - add.d $t3, $s5, $a1 - st.d $t3, $sp, 80 # 8-byte Folded Spill + add.d $t4, $s0, $a1 + st.d $t4, $sp, 88 # 8-byte Folded Spill + add.d $t4, $s5, $a1 + st.d $t4, $sp, 80 # 8-byte Folded Spill add.d $t7, $s1, $a1 st.d $a2, $sp, 552 # 8-byte Folded Spill bstrpick.d $a1, $a2, 31, 0 @@ -222194,14 +222162,14 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg5I5FieldI22UniformR addi.d $a4, $a4, 152 st.d $a4, $sp, 296 # 8-byte Folded Spill ld.d $a4, $sp, 248 # 8-byte Folded Reload - add.d $t3, $s7, $a2 - st.d $t3, $sp, 320 # 8-byte Folded Spill - add.d $t3, $s8, $a2 - st.d $t3, $sp, 360 # 8-byte Folded Spill - add.d $t3, $ra, $a2 - st.d $t3, $sp, 288 # 8-byte Folded Spill - add.d $t3, $a3, $a2 - st.d $t3, $sp, 352 # 8-byte Folded Spill + add.d $t4, $s7, $a2 + st.d $t4, $sp, 320 # 8-byte Folded Spill + add.d $t4, $s8, $a2 + st.d $t4, $sp, 360 # 8-byte Folded Spill + add.d $t4, $ra, $a2 + st.d $t4, $sp, 288 # 8-byte Folded Spill + add.d $t4, $a3, $a2 + st.d $t4, $sp, 352 # 8-byte Folded Spill add.d $a2, $a6, $a2 st.d $a2, $sp, 336 # 8-byte Folded Spill addi.d $a1, $a1, 1 @@ -222212,10 +222180,11 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg5I5FieldI22UniformR ld.d $a2, $sp, 560 # 8-byte Folded Reload alsl.d $a1, $a1, $a2, 2 st.d $a1, $sp, 272 # 8-byte Folded Spill - vldi $vr1, -928 + vldi $vr0, -928 + xvldi $xr1, -928 addi.d $a1, $a0, 8 st.d $a1, $sp, 304 # 8-byte Folded Spill - ld.d $t3, $sp, 80 # 8-byte Folded Reload + ld.d $t4, $sp, 80 # 8-byte Folded Reload st.d $t7, $sp, 8 # 8-byte Folded Spill st.d $t0, $sp, 16 # 8-byte Folded Spill move $a2, $t0 @@ -222241,8 +222210,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg5I5FieldI22UniformR ld.d $a5, $sp, 112 # 8-byte Folded Reload add.d $a5, $a5, $t2 ld.d $a7, $sp, 224 # 8-byte Folded Reload - ld.d $t4, $sp, 152 # 8-byte Folded Reload - add.d $t4, $t4, $a7 + ld.d $t3, $sp, 152 # 8-byte Folded Reload + add.d $t3, $t3, $a7 ld.d $s4, $sp, 72 # 8-byte Folded Reload ld.d $t0, $sp, 120 # 8-byte Folded Reload add.d $t0, $t0, $s4 @@ -222251,8 +222220,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg5I5FieldI22UniformR ld.d $s6, $sp, 64 # 8-byte Folded Reload ld.d $t7, $sp, 136 # 8-byte Folded Reload add.d $t7, $t7, $s6 - ld.d $t3, $sp, 144 # 8-byte Folded Reload - add.d $t3, $t3, $s6 + ld.d $t4, $sp, 144 # 8-byte Folded Reload + add.d $t4, $t4, $s6 ld.d $s1, $sp, 176 # 8-byte Folded Reload add.d $s1, $s1, $s6 ld.d $s5, $sp, 184 # 8-byte Folded Reload @@ -222328,15 +222297,15 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg5I5FieldI22UniformR st.d $s5, $sp, 488 # 8-byte Folded Spill st.d $s1, $sp, 176 # 8-byte Folded Spill st.d $s1, $sp, 480 # 8-byte Folded Spill - st.d $t3, $sp, 144 # 8-byte Folded Spill - move $t8, $t3 + st.d $t4, $sp, 144 # 8-byte Folded Spill + move $t8, $t4 st.d $t7, $sp, 136 # 8-byte Folded Spill move $s5, $t7 st.d $t6, $sp, 128 # 8-byte Folded Spill move $a2, $t6 st.d $t0, $sp, 120 # 8-byte Folded Spill - st.d $t4, $sp, 152 # 8-byte Folded Spill - move $s2, $t4 + st.d $t3, $sp, 152 # 8-byte Folded Spill + move $s2, $t3 st.d $a5, $sp, 112 # 8-byte Folded Spill st.d $t1, $sp, 104 # 8-byte Folded Spill st.d $a4, $sp, 248 # 8-byte Folded Spill @@ -222391,8 +222360,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg5I5FieldI22UniformR fldx.d $fa6, $a3, $s6 fdiv.d $fa2, $fa2, $fa3 fsub.d $fa2, $fa4, $fa2 - fmul.d $fa4, $fa5, $fa1 - fmadd.d $fa4, $fa6, $fa1, $fa4 + fmul.d $fa4, $fa5, $fa0 + fmadd.d $fa4, $fa6, $fa0, $fa4 add.w $s6, $a4, $s1 slli.d $s6, $s6, 3 fldx.d $fa5, $a6, $s6 @@ -222664,8 +222633,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg5I5FieldI22UniformR xvldx $xr7, $a3, $t3 xvfdiv.d $xr4, $xr4, $xr3 xvfsub.d $xr4, $xr5, $xr4 - xvfmul.d $xr5, $xr6, $xr0 - xvfmadd.d $xr5, $xr7, $xr0, $xr5 + xvfmul.d $xr5, $xr6, $xr1 + xvfmadd.d $xr5, $xr7, $xr1, $xr5 add.w $t3, $t8, $a1 slli.d $t3, $t3, 3 xvldx $xr6, $a6, $t3 @@ -224739,8 +224708,6 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR addi.d $a1, $a1, 8 add.d $t2, $a4, $t2 add.d $a4, $a4, $t1 - lu52i.d $a2, $zero, 1022 - xvreplgr2vr.d $xr0, $a2 add.d $a2, $t3, $a1 st.d $a2, $sp, 240 # 8-byte Folded Spill add.d $a2, $a5, $a1 @@ -224754,7 +224721,8 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR st.d $a2, $sp, 232 # 8-byte Folded Spill alsl.d $a1, $a1, $ra, 2 st.d $a1, $sp, 200 # 8-byte Folded Spill - vldi $vr1, -928 + vldi $vr0, -928 + xvldi $xr1, -928 addi.d $a1, $s3, -1 addi.d $s6, $s7, -1 addi.d $a3, $a0, 16 @@ -224889,15 +224857,15 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR fldx.d $fa5, $a5, $t4 fadd.d $fa2, $fa2, $fa3 fadd.d $fa3, $fa4, $fa5 - fmul.d $fa3, $fa3, $fa1 - fmadd.d $fa2, $fa2, $fa1, $fa3 + fmul.d $fa3, $fa3, $fa0 + fmadd.d $fa2, $fa2, $fa0, $fa3 add.w $t4, $a4, $s0 slli.d $t4, $t4, 3 fldx.d $fa3, $t0, $t4 fld.d $fa4, $a0, 0 fld.d $fa5, $a0, 8 addi.w $t4, $a4, 0 - fmul.d $fa2, $fa2, $fa1 + fmul.d $fa2, $fa2, $fa0 fmul.d $fa2, $fa2, $fa4 fmadd.d $fa2, $fa3, $fa5, $fa2 add.w $fp, $a4, $a1 @@ -225070,12 +225038,12 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR xvldx $xr7, $a5, $a3 xvfadd.d $xr4, $xr4, $xr5 xvfadd.d $xr5, $xr6, $xr7 - xvfmul.d $xr5, $xr5, $xr0 + xvfmul.d $xr5, $xr5, $xr1 add.w $a3, $s2, $a1 slli.d $a3, $a3, 3 xvldx $xr6, $t0, $a3 - xvfmadd.d $xr4, $xr4, $xr0, $xr5 - xvfmul.d $xr4, $xr4, $xr0 + xvfmadd.d $xr4, $xr4, $xr1, $xr5 + xvfmul.d $xr4, $xr4, $xr1 xvfmul.d $xr4, $xr4, $xr3 xvfmadd.d $xr4, $xr6, $xr2, $xr4 add.w $a3, $t8, $a1 @@ -226023,10 +225991,6 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR add.d $ra, $a1, $a4 add.d $s7, $a4, $t5 add.d $a4, $a4, $t2 - lu52i.d $a1, $zero, 1022 - xvreplgr2vr.d $xr0, $a1 - lu52i.d $a1, $zero, -1026 - xvreplgr2vr.d $xr1, $a1 slli.d $a1, $a2, 3 addi.d $a1, $a1, 8 add.d $a7, $t3, $a1 @@ -226042,8 +226006,10 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR st.d $a2, $sp, 216 # 8-byte Folded Spill alsl.d $a1, $a1, $s0, 2 st.d $a1, $sp, 184 # 8-byte Folded Spill - vldi $vr2, -928 - vldi $vr3, -800 + vldi $vr0, -928 + vldi $vr1, -800 + xvldi $xr2, -928 + xvldi $xr3, -800 st.d $s2, $sp, 40 # 8-byte Folded Spill addi.d $a1, $s2, 1 st.d $a1, $sp, 16 # 8-byte Folded Spill @@ -226170,15 +226136,15 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR fldx.d $fa7, $a5, $t1 fadd.d $fa4, $fa4, $fa5 fadd.d $fa5, $fa6, $fa7 - fmul.d $fa5, $fa5, $fa2 - fmadd.d $fa4, $fa4, $fa2, $fa5 + fmul.d $fa5, $fa5, $fa0 + fmadd.d $fa4, $fa4, $fa0, $fa5 add.w $t1, $a4, $a2 slli.d $t1, $t1, 3 fldx.d $fa5, $t0, $t1 fld.d $fa6, $a0, 8 fld.d $fa7, $a0, 0 addi.w $t1, $a4, 0 - fmul.d $fa4, $fa4, $fa3 + fmul.d $fa4, $fa4, $fa1 fmul.d $fa5, $fa5, $fa6 fmadd.d $fa4, $fa4, $fa7, $fa5 add.w $s1, $a4, $a1 @@ -226344,9 +226310,9 @@ _ZN18LoopApplyEvaluator8evaluateI16ApplyMultiArgLocI9MultiArg2I5FieldI22UniformR add.w $a4, $s3, $a1 slli.d $a4, $a4, 3 xvldx $xr8, $t0, $a4 - xvfmul.d $xr7, $xr7, $xr0 - xvfmadd.d $xr6, $xr6, $xr0, $xr7 - xvfmul.d $xr6, $xr6, $xr1 + xvfmul.d $xr7, $xr7, $xr2 + xvfmadd.d $xr6, $xr6, $xr2, $xr7 + xvfmul.d $xr6, $xr6, $xr3 xvfmul.d $xr7, $xr8, $xr5 xvfmadd.d $xr6, $xr6, $xr4, $xr7 add.w $a4, $a3, $a1 diff --git a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/loop_unroll.dir/loop_unroll.s b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/loop_unroll.dir/loop_unroll.s index 632f991d..0741849a 100644 --- a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/loop_unroll.dir/loop_unroll.s +++ b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/loop_unroll.dir/loop_unroll.s @@ -80259,25 +80259,25 @@ _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc .cfi_startproc # %bb.0: - addi.d $sp, $sp, -208 - .cfi_def_cfa_offset 208 - st.d $ra, $sp, 200 # 8-byte Folded Spill - st.d $fp, $sp, 192 # 8-byte Folded Spill - st.d $s0, $sp, 184 # 8-byte Folded Spill - st.d $s1, $sp, 176 # 8-byte Folded Spill - st.d $s2, $sp, 168 # 8-byte Folded Spill - st.d $s3, $sp, 160 # 8-byte Folded Spill - st.d $s4, $sp, 152 # 8-byte Folded Spill - st.d $s5, $sp, 144 # 8-byte Folded Spill - st.d $s6, $sp, 136 # 8-byte Folded Spill - st.d $s7, $sp, 128 # 8-byte Folded Spill - st.d $s8, $sp, 120 # 8-byte Folded Spill - fst.d $fs0, $sp, 112 # 8-byte Folded Spill - fst.d $fs1, $sp, 104 # 8-byte Folded Spill - fst.d $fs2, $sp, 96 # 8-byte Folded Spill - fst.d $fs3, $sp, 88 # 8-byte Folded Spill - fst.d $fs4, $sp, 80 # 8-byte Folded Spill - fst.d $fs5, $sp, 72 # 8-byte Folded Spill + addi.d $sp, $sp, -192 + .cfi_def_cfa_offset 192 + st.d $ra, $sp, 184 # 8-byte Folded Spill + st.d $fp, $sp, 176 # 8-byte Folded Spill + st.d $s0, $sp, 168 # 8-byte Folded Spill + st.d $s1, $sp, 160 # 8-byte Folded Spill + st.d $s2, $sp, 152 # 8-byte Folded Spill + st.d $s3, $sp, 144 # 8-byte Folded Spill + st.d $s4, $sp, 136 # 8-byte Folded Spill + st.d $s5, $sp, 128 # 8-byte Folded Spill + st.d $s6, $sp, 120 # 8-byte Folded Spill + st.d $s7, $sp, 112 # 8-byte Folded Spill + st.d $s8, $sp, 104 # 8-byte Folded Spill + fst.d $fs0, $sp, 96 # 8-byte Folded Spill + fst.d $fs1, $sp, 88 # 8-byte Folded Spill + fst.d $fs2, $sp, 80 # 8-byte Folded Spill + fst.d $fs3, $sp, 72 # 8-byte Folded Spill + fst.d $fs4, $sp, 64 # 8-byte Folded Spill + fst.d $fs5, $sp, 56 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -80423,26 +80423,22 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa lu32i.d $a2, -516992 lu52i.d $a2, $a2, 1036 vreplgr2vr.d $vr3, $a2 - ori $a2, $zero, 0 - lu32i.d $a2, -393216 - lu52i.d $a2, $a2, -1022 - vreplgr2vr.d $vr4, $a2 lu32i.d $a0, -268678 + lu52i.d $a0, $a0, 1042 pcalau12i $a2, %pc_hi20(.LCPI179_2) fld.d $fs0, $a2, %pc_lo12(.LCPI179_2) pcalau12i $a2, %pc_hi20(.LCPI179_4) fld.d $fs1, $a2, %pc_lo12(.LCPI179_4) pcalau12i $a2, %pc_hi20(.LCPI179_3) fld.d $fs2, $a2, %pc_lo12(.LCPI179_3) - lu52i.d $a0, $a0, 1042 - vreplgr2vr.d $vr5, $a0 + vreplgr2vr.d $vr4, $a0 + vldi $vr5, -854 movgr2fr.d $fs3, $zero pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s5, $zero - vst $vr3, $sp, 48 # 16-byte Folded Spill - vst $vr4, $sp, 32 # 16-byte Folded Spill - vst $vr5, $sp, 16 # 16-byte Folded Spill + vst $vr3, $sp, 32 # 16-byte Folded Spill + vst $vr4, $sp, 16 # 16-byte Folded Spill b .LBB179_13 .p2align 4, , 16 .LBB179_12: # %_Z9check_sumIdEvT_.exit.us31 @@ -80455,11 +80451,11 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa fld.d $fa1, $fp, %pc_lo12(init_value) vextrins.d $vr0, $vr1, 16 vfadd.d $vr0, $vr0, $vr3 - vfmadd.d $vr0, $vr0, $vr5, $vr4 + vfmadd.d $vr0, $vr0, $vr4, $vr5 vfadd.d $vr0, $vr0, $vr3 - vfmadd.d $vr0, $vr0, $vr5, $vr4 + vfmadd.d $vr0, $vr0, $vr4, $vr5 vfadd.d $vr0, $vr0, $vr3 - vfmadd.d $vr0, $vr0, $vr5, $vr4 + vfmadd.d $vr0, $vr0, $vr4, $vr5 vreplvei.d $vr1, $vr0, 0 fadd.d $fa1, $fa1, $fs3 vreplvei.d $vr0, $vr0, 1 @@ -80477,9 +80473,9 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a0, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - vld $vr5, $sp, 16 # 16-byte Folded Reload - vld $vr4, $sp, 32 # 16-byte Folded Reload - vld $vr3, $sp, 48 # 16-byte Folded Reload + vldi $vr5, -854 + vld $vr4, $sp, 16 # 16-byte Folded Reload + vld $vr3, $sp, 32 # 16-byte Folded Reload ld.w $a1, $s4, %pc_lo12(iterations) b .LBB179_12 .LBB179_15: # %.preheader17.preheader @@ -80566,24 +80562,24 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa st.d $a2, $a1, 8 addi.d $a0, $a0, 1 st.w $a0, $s2, %pc_lo12(current_test) - fld.d $fs5, $sp, 72 # 8-byte Folded Reload - fld.d $fs4, $sp, 80 # 8-byte Folded Reload - fld.d $fs3, $sp, 88 # 8-byte Folded Reload - fld.d $fs2, $sp, 96 # 8-byte Folded Reload - fld.d $fs1, $sp, 104 # 8-byte Folded Reload - fld.d $fs0, $sp, 112 # 8-byte Folded Reload - ld.d $s8, $sp, 120 # 8-byte Folded Reload - ld.d $s7, $sp, 128 # 8-byte Folded Reload - ld.d $s6, $sp, 136 # 8-byte Folded Reload - ld.d $s5, $sp, 144 # 8-byte Folded Reload - ld.d $s4, $sp, 152 # 8-byte Folded Reload - ld.d $s3, $sp, 160 # 8-byte Folded Reload - ld.d $s2, $sp, 168 # 8-byte Folded Reload - ld.d $s1, $sp, 176 # 8-byte Folded Reload - ld.d $s0, $sp, 184 # 8-byte Folded Reload - ld.d $fp, $sp, 192 # 8-byte Folded Reload - ld.d $ra, $sp, 200 # 8-byte Folded Reload - addi.d $sp, $sp, 208 + fld.d $fs5, $sp, 56 # 8-byte Folded Reload + fld.d $fs4, $sp, 64 # 8-byte Folded Reload + fld.d $fs3, $sp, 72 # 8-byte Folded Reload + fld.d $fs2, $sp, 80 # 8-byte Folded Reload + fld.d $fs1, $sp, 88 # 8-byte Folded Reload + fld.d $fs0, $sp, 96 # 8-byte Folded Reload + ld.d $s8, $sp, 104 # 8-byte Folded Reload + ld.d $s7, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 128 # 8-byte Folded Reload + ld.d $s4, $sp, 136 # 8-byte Folded Reload + ld.d $s3, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 152 # 8-byte Folded Reload + ld.d $s1, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload + ld.d $fp, $sp, 176 # 8-byte Folded Reload + ld.d $ra, $sp, 184 # 8-byte Folded Reload + addi.d $sp, $sp, 192 ret .LBB179_24: ld.w $a1, $s1, %pc_lo12(allocated_results) @@ -95719,25 +95715,25 @@ _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc .cfi_startproc # %bb.0: - addi.d $sp, $sp, -208 - .cfi_def_cfa_offset 208 - st.d $ra, $sp, 200 # 8-byte Folded Spill - st.d $fp, $sp, 192 # 8-byte Folded Spill - st.d $s0, $sp, 184 # 8-byte Folded Spill - st.d $s1, $sp, 176 # 8-byte Folded Spill - st.d $s2, $sp, 168 # 8-byte Folded Spill - st.d $s3, $sp, 160 # 8-byte Folded Spill - st.d $s4, $sp, 152 # 8-byte Folded Spill - st.d $s5, $sp, 144 # 8-byte Folded Spill - st.d $s6, $sp, 136 # 8-byte Folded Spill - st.d $s7, $sp, 128 # 8-byte Folded Spill - st.d $s8, $sp, 120 # 8-byte Folded Spill - fst.d $fs0, $sp, 112 # 8-byte Folded Spill - fst.d $fs1, $sp, 104 # 8-byte Folded Spill - fst.d $fs2, $sp, 96 # 8-byte Folded Spill - fst.d $fs3, $sp, 88 # 8-byte Folded Spill - fst.d $fs4, $sp, 80 # 8-byte Folded Spill - fst.d $fs5, $sp, 72 # 8-byte Folded Spill + addi.d $sp, $sp, -192 + .cfi_def_cfa_offset 192 + st.d $ra, $sp, 184 # 8-byte Folded Spill + st.d $fp, $sp, 176 # 8-byte Folded Spill + st.d $s0, $sp, 168 # 8-byte Folded Spill + st.d $s1, $sp, 160 # 8-byte Folded Spill + st.d $s2, $sp, 152 # 8-byte Folded Spill + st.d $s3, $sp, 144 # 8-byte Folded Spill + st.d $s4, $sp, 136 # 8-byte Folded Spill + st.d $s5, $sp, 128 # 8-byte Folded Spill + st.d $s6, $sp, 120 # 8-byte Folded Spill + st.d $s7, $sp, 112 # 8-byte Folded Spill + st.d $s8, $sp, 104 # 8-byte Folded Spill + fst.d $fs0, $sp, 96 # 8-byte Folded Spill + fst.d $fs1, $sp, 88 # 8-byte Folded Spill + fst.d $fs2, $sp, 80 # 8-byte Folded Spill + fst.d $fs3, $sp, 72 # 8-byte Folded Spill + fst.d $fs4, $sp, 64 # 8-byte Folded Spill + fst.d $fs5, $sp, 56 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -95883,26 +95879,22 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol lu32i.d $a2, -516992 lu52i.d $a2, $a2, 1036 vreplgr2vr.d $vr3, $a2 - ori $a2, $zero, 0 - lu32i.d $a2, -393216 - lu52i.d $a2, $a2, -1022 - vreplgr2vr.d $vr4, $a2 lu32i.d $a0, -268678 + lu52i.d $a0, $a0, 1042 pcalau12i $a2, %pc_hi20(.LCPI214_2) fld.d $fs0, $a2, %pc_lo12(.LCPI214_2) pcalau12i $a2, %pc_hi20(.LCPI214_4) fld.d $fs1, $a2, %pc_lo12(.LCPI214_4) pcalau12i $a2, %pc_hi20(.LCPI214_3) fld.d $fs2, $a2, %pc_lo12(.LCPI214_3) - lu52i.d $a0, $a0, 1042 - vreplgr2vr.d $vr5, $a0 + vreplgr2vr.d $vr4, $a0 + vldi $vr5, -854 movgr2fr.d $fs3, $zero pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s5, $zero - vst $vr3, $sp, 48 # 16-byte Folded Spill - vst $vr4, $sp, 32 # 16-byte Folded Spill - vst $vr5, $sp, 16 # 16-byte Folded Spill + vst $vr3, $sp, 32 # 16-byte Folded Spill + vst $vr4, $sp, 16 # 16-byte Folded Spill b .LBB214_13 .p2align 4, , 16 .LBB214_12: # %_Z9check_sumIdEvT_.exit.us31 @@ -95915,11 +95907,11 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol fld.d $fa1, $fp, %pc_lo12(init_value) vextrins.d $vr0, $vr1, 16 vfadd.d $vr0, $vr0, $vr3 - vfmadd.d $vr0, $vr0, $vr5, $vr4 + vfmadd.d $vr0, $vr0, $vr4, $vr5 vfadd.d $vr0, $vr0, $vr3 - vfmadd.d $vr0, $vr0, $vr5, $vr4 + vfmadd.d $vr0, $vr0, $vr4, $vr5 vfadd.d $vr0, $vr0, $vr3 - vfmadd.d $vr0, $vr0, $vr5, $vr4 + vfmadd.d $vr0, $vr0, $vr4, $vr5 vreplvei.d $vr1, $vr0, 0 fadd.d $fa1, $fa1, $fs3 vreplvei.d $vr0, $vr0, 1 @@ -95937,9 +95929,9 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a0, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - vld $vr5, $sp, 16 # 16-byte Folded Reload - vld $vr4, $sp, 32 # 16-byte Folded Reload - vld $vr3, $sp, 48 # 16-byte Folded Reload + vldi $vr5, -854 + vld $vr4, $sp, 16 # 16-byte Folded Reload + vld $vr3, $sp, 32 # 16-byte Folded Reload ld.w $a1, $s4, %pc_lo12(iterations) b .LBB214_12 .LBB214_15: # %.preheader17.preheader @@ -96026,24 +96018,24 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol st.d $a2, $a1, 8 addi.d $a0, $a0, 1 st.w $a0, $s2, %pc_lo12(current_test) - fld.d $fs5, $sp, 72 # 8-byte Folded Reload - fld.d $fs4, $sp, 80 # 8-byte Folded Reload - fld.d $fs3, $sp, 88 # 8-byte Folded Reload - fld.d $fs2, $sp, 96 # 8-byte Folded Reload - fld.d $fs1, $sp, 104 # 8-byte Folded Reload - fld.d $fs0, $sp, 112 # 8-byte Folded Reload - ld.d $s8, $sp, 120 # 8-byte Folded Reload - ld.d $s7, $sp, 128 # 8-byte Folded Reload - ld.d $s6, $sp, 136 # 8-byte Folded Reload - ld.d $s5, $sp, 144 # 8-byte Folded Reload - ld.d $s4, $sp, 152 # 8-byte Folded Reload - ld.d $s3, $sp, 160 # 8-byte Folded Reload - ld.d $s2, $sp, 168 # 8-byte Folded Reload - ld.d $s1, $sp, 176 # 8-byte Folded Reload - ld.d $s0, $sp, 184 # 8-byte Folded Reload - ld.d $fp, $sp, 192 # 8-byte Folded Reload - ld.d $ra, $sp, 200 # 8-byte Folded Reload - addi.d $sp, $sp, 208 + fld.d $fs5, $sp, 56 # 8-byte Folded Reload + fld.d $fs4, $sp, 64 # 8-byte Folded Reload + fld.d $fs3, $sp, 72 # 8-byte Folded Reload + fld.d $fs2, $sp, 80 # 8-byte Folded Reload + fld.d $fs1, $sp, 88 # 8-byte Folded Reload + fld.d $fs0, $sp, 96 # 8-byte Folded Reload + ld.d $s8, $sp, 104 # 8-byte Folded Reload + ld.d $s7, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 128 # 8-byte Folded Reload + ld.d $s4, $sp, 136 # 8-byte Folded Reload + ld.d $s3, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 152 # 8-byte Folded Reload + ld.d $s1, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload + ld.d $fp, $sp, 176 # 8-byte Folded Reload + ld.d $ra, $sp, 184 # 8-byte Folded Reload + addi.d $sp, $sp, 192 ret .LBB214_24: ld.w $a1, $s1, %pc_lo12(allocated_results) @@ -111389,25 +111381,25 @@ _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc .cfi_startproc # %bb.0: - addi.d $sp, $sp, -224 - .cfi_def_cfa_offset 224 - st.d $ra, $sp, 216 # 8-byte Folded Spill - st.d $fp, $sp, 208 # 8-byte Folded Spill - st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill - st.d $s2, $sp, 184 # 8-byte Folded Spill - st.d $s3, $sp, 176 # 8-byte Folded Spill - st.d $s4, $sp, 168 # 8-byte Folded Spill - st.d $s5, $sp, 160 # 8-byte Folded Spill - st.d $s6, $sp, 152 # 8-byte Folded Spill - st.d $s7, $sp, 144 # 8-byte Folded Spill - st.d $s8, $sp, 136 # 8-byte Folded Spill - fst.d $fs0, $sp, 128 # 8-byte Folded Spill - fst.d $fs1, $sp, 120 # 8-byte Folded Spill - fst.d $fs2, $sp, 112 # 8-byte Folded Spill - fst.d $fs3, $sp, 104 # 8-byte Folded Spill - fst.d $fs4, $sp, 96 # 8-byte Folded Spill - fst.d $fs5, $sp, 88 # 8-byte Folded Spill + addi.d $sp, $sp, -208 + .cfi_def_cfa_offset 208 + st.d $ra, $sp, 200 # 8-byte Folded Spill + st.d $fp, $sp, 192 # 8-byte Folded Spill + st.d $s0, $sp, 184 # 8-byte Folded Spill + st.d $s1, $sp, 176 # 8-byte Folded Spill + st.d $s2, $sp, 168 # 8-byte Folded Spill + st.d $s3, $sp, 160 # 8-byte Folded Spill + st.d $s4, $sp, 152 # 8-byte Folded Spill + st.d $s5, $sp, 144 # 8-byte Folded Spill + st.d $s6, $sp, 136 # 8-byte Folded Spill + st.d $s7, $sp, 128 # 8-byte Folded Spill + st.d $s8, $sp, 120 # 8-byte Folded Spill + fst.d $fs0, $sp, 112 # 8-byte Folded Spill + fst.d $fs1, $sp, 104 # 8-byte Folded Spill + fst.d $fs2, $sp, 96 # 8-byte Folded Spill + fst.d $fs3, $sp, 88 # 8-byte Folded Spill + fst.d $fs4, $sp, 80 # 8-byte Folded Spill + fst.d $fs5, $sp, 72 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -111560,26 +111552,22 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact lu32i.d $a2, -516992 lu52i.d $a2, $a2, 1036 vreplgr2vr.d $vr4, $a2 - ori $a2, $zero, 0 - lu32i.d $a2, -393216 - lu52i.d $a2, $a2, -1022 - vreplgr2vr.d $vr5, $a2 lu32i.d $a0, -268678 + lu52i.d $a0, $a0, 1042 pcalau12i $a2, %pc_hi20(.LCPI249_2) fld.d $fs0, $a2, %pc_lo12(.LCPI249_2) pcalau12i $a2, %pc_hi20(.LCPI249_4) fld.d $fs1, $a2, %pc_lo12(.LCPI249_4) pcalau12i $a2, %pc_hi20(.LCPI249_3) fld.d $fs2, $a2, %pc_lo12(.LCPI249_3) - lu52i.d $a0, $a0, 1042 - vreplgr2vr.d $vr6, $a0 + vreplgr2vr.d $vr5, $a0 + vldi $vr6, -854 movgr2fr.d $fs3, $zero pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $fp, $zero - vst $vr4, $sp, 64 # 16-byte Folded Spill - vst $vr5, $sp, 48 # 16-byte Folded Spill - vst $vr6, $sp, 32 # 16-byte Folded Spill + vst $vr4, $sp, 48 # 16-byte Folded Spill + vst $vr5, $sp, 32 # 16-byte Folded Spill b .LBB249_14 .p2align 4, , 16 .LBB249_13: # %_Z9check_sumIdEvT_.exit.us34 @@ -111591,11 +111579,11 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact fld.d $fa1, $s1, 0 vextrins.d $vr1, $vr0, 16 vfadd.d $vr1, $vr1, $vr4 - vfmadd.d $vr1, $vr1, $vr6, $vr5 + vfmadd.d $vr1, $vr1, $vr5, $vr6 vfadd.d $vr1, $vr1, $vr4 - vfmadd.d $vr1, $vr1, $vr6, $vr5 + vfmadd.d $vr1, $vr1, $vr5, $vr6 vfadd.d $vr1, $vr1, $vr4 - vfmadd.d $vr1, $vr1, $vr6, $vr5 + vfmadd.d $vr1, $vr1, $vr5, $vr6 vreplvei.d $vr2, $vr1, 0 fadd.d $fa2, $fa2, $fs3 vreplvei.d $vr1, $vr1, 1 @@ -111613,9 +111601,9 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a0, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - vld $vr6, $sp, 32 # 16-byte Folded Reload - vld $vr5, $sp, 48 # 16-byte Folded Reload - vld $vr4, $sp, 64 # 16-byte Folded Reload + vldi $vr6, -854 + vld $vr5, $sp, 32 # 16-byte Folded Reload + vld $vr4, $sp, 48 # 16-byte Folded Reload fld.d $fa0, $s5, %pc_lo12(init_value) ld.w $a1, $s4, %pc_lo12(iterations) b .LBB249_13 @@ -111703,24 +111691,24 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact st.d $a2, $a1, 8 addi.d $a0, $a0, 1 st.w $a0, $s2, %pc_lo12(current_test) - fld.d $fs5, $sp, 88 # 8-byte Folded Reload - fld.d $fs4, $sp, 96 # 8-byte Folded Reload - fld.d $fs3, $sp, 104 # 8-byte Folded Reload - fld.d $fs2, $sp, 112 # 8-byte Folded Reload - fld.d $fs1, $sp, 120 # 8-byte Folded Reload - fld.d $fs0, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 136 # 8-byte Folded Reload - ld.d $s7, $sp, 144 # 8-byte Folded Reload - ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s5, $sp, 160 # 8-byte Folded Reload - ld.d $s4, $sp, 168 # 8-byte Folded Reload - ld.d $s3, $sp, 176 # 8-byte Folded Reload - ld.d $s2, $sp, 184 # 8-byte Folded Reload - ld.d $s1, $sp, 192 # 8-byte Folded Reload - ld.d $s0, $sp, 200 # 8-byte Folded Reload - ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $ra, $sp, 216 # 8-byte Folded Reload - addi.d $sp, $sp, 224 + fld.d $fs5, $sp, 72 # 8-byte Folded Reload + fld.d $fs4, $sp, 80 # 8-byte Folded Reload + fld.d $fs3, $sp, 88 # 8-byte Folded Reload + fld.d $fs2, $sp, 96 # 8-byte Folded Reload + fld.d $fs1, $sp, 104 # 8-byte Folded Reload + fld.d $fs0, $sp, 112 # 8-byte Folded Reload + ld.d $s8, $sp, 120 # 8-byte Folded Reload + ld.d $s7, $sp, 128 # 8-byte Folded Reload + ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s5, $sp, 144 # 8-byte Folded Reload + ld.d $s4, $sp, 152 # 8-byte Folded Reload + ld.d $s3, $sp, 160 # 8-byte Folded Reload + ld.d $s2, $sp, 168 # 8-byte Folded Reload + ld.d $s1, $sp, 176 # 8-byte Folded Reload + ld.d $s0, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 192 # 8-byte Folded Reload + ld.d $ra, $sp, 200 # 8-byte Folded Reload + addi.d $sp, $sp, 208 ret .LBB249_25: ld.w $a1, $s1, %pc_lo12(allocated_results) @@ -127066,25 +127054,25 @@ _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc .cfi_startproc # %bb.0: - addi.d $sp, $sp, -224 - .cfi_def_cfa_offset 224 - st.d $ra, $sp, 216 # 8-byte Folded Spill - st.d $fp, $sp, 208 # 8-byte Folded Spill - st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill - st.d $s2, $sp, 184 # 8-byte Folded Spill - st.d $s3, $sp, 176 # 8-byte Folded Spill - st.d $s4, $sp, 168 # 8-byte Folded Spill - st.d $s5, $sp, 160 # 8-byte Folded Spill - st.d $s6, $sp, 152 # 8-byte Folded Spill - st.d $s7, $sp, 144 # 8-byte Folded Spill - st.d $s8, $sp, 136 # 8-byte Folded Spill - fst.d $fs0, $sp, 128 # 8-byte Folded Spill - fst.d $fs1, $sp, 120 # 8-byte Folded Spill - fst.d $fs2, $sp, 112 # 8-byte Folded Spill - fst.d $fs3, $sp, 104 # 8-byte Folded Spill - fst.d $fs4, $sp, 96 # 8-byte Folded Spill - fst.d $fs5, $sp, 88 # 8-byte Folded Spill + addi.d $sp, $sp, -208 + .cfi_def_cfa_offset 208 + st.d $ra, $sp, 200 # 8-byte Folded Spill + st.d $fp, $sp, 192 # 8-byte Folded Spill + st.d $s0, $sp, 184 # 8-byte Folded Spill + st.d $s1, $sp, 176 # 8-byte Folded Spill + st.d $s2, $sp, 168 # 8-byte Folded Spill + st.d $s3, $sp, 160 # 8-byte Folded Spill + st.d $s4, $sp, 152 # 8-byte Folded Spill + st.d $s5, $sp, 144 # 8-byte Folded Spill + st.d $s6, $sp, 136 # 8-byte Folded Spill + st.d $s7, $sp, 128 # 8-byte Folded Spill + st.d $s8, $sp, 120 # 8-byte Folded Spill + fst.d $fs0, $sp, 112 # 8-byte Folded Spill + fst.d $fs1, $sp, 104 # 8-byte Folded Spill + fst.d $fs2, $sp, 96 # 8-byte Folded Spill + fst.d $fs3, $sp, 88 # 8-byte Folded Spill + fst.d $fs4, $sp, 80 # 8-byte Folded Spill + fst.d $fs5, $sp, 72 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -127237,26 +127225,22 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ lu32i.d $a2, -516992 lu52i.d $a2, $a2, 1036 vreplgr2vr.d $vr4, $a2 - ori $a2, $zero, 0 - lu32i.d $a2, -393216 - lu52i.d $a2, $a2, -1022 - vreplgr2vr.d $vr5, $a2 lu32i.d $a0, -268678 + lu52i.d $a0, $a0, 1042 pcalau12i $a2, %pc_hi20(.LCPI284_2) fld.d $fs0, $a2, %pc_lo12(.LCPI284_2) pcalau12i $a2, %pc_hi20(.LCPI284_4) fld.d $fs1, $a2, %pc_lo12(.LCPI284_4) pcalau12i $a2, %pc_hi20(.LCPI284_3) fld.d $fs2, $a2, %pc_lo12(.LCPI284_3) - lu52i.d $a0, $a0, 1042 - vreplgr2vr.d $vr6, $a0 + vreplgr2vr.d $vr5, $a0 + vldi $vr6, -854 movgr2fr.d $fs3, $zero pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $fp, $zero - vst $vr4, $sp, 64 # 16-byte Folded Spill - vst $vr5, $sp, 48 # 16-byte Folded Spill - vst $vr6, $sp, 32 # 16-byte Folded Spill + vst $vr4, $sp, 48 # 16-byte Folded Spill + vst $vr5, $sp, 32 # 16-byte Folded Spill b .LBB284_14 .p2align 4, , 16 .LBB284_13: # %_Z9check_sumIdEvT_.exit.us34 @@ -127268,11 +127252,11 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ fld.d $fa1, $s1, 0 vextrins.d $vr1, $vr0, 16 vfadd.d $vr1, $vr1, $vr4 - vfmadd.d $vr1, $vr1, $vr6, $vr5 + vfmadd.d $vr1, $vr1, $vr5, $vr6 vfadd.d $vr1, $vr1, $vr4 - vfmadd.d $vr1, $vr1, $vr6, $vr5 + vfmadd.d $vr1, $vr1, $vr5, $vr6 vfadd.d $vr1, $vr1, $vr4 - vfmadd.d $vr1, $vr1, $vr6, $vr5 + vfmadd.d $vr1, $vr1, $vr5, $vr6 vreplvei.d $vr2, $vr1, 0 fadd.d $fa2, $fa2, $fs3 vreplvei.d $vr1, $vr1, 1 @@ -127290,9 +127274,9 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a0, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - vld $vr6, $sp, 32 # 16-byte Folded Reload - vld $vr5, $sp, 48 # 16-byte Folded Reload - vld $vr4, $sp, 64 # 16-byte Folded Reload + vldi $vr6, -854 + vld $vr5, $sp, 32 # 16-byte Folded Reload + vld $vr4, $sp, 48 # 16-byte Folded Reload fld.d $fa0, $s5, %pc_lo12(init_value) ld.w $a1, $s4, %pc_lo12(iterations) b .LBB284_13 @@ -127380,24 +127364,24 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ st.d $a2, $a1, 8 addi.d $a0, $a0, 1 st.w $a0, $s2, %pc_lo12(current_test) - fld.d $fs5, $sp, 88 # 8-byte Folded Reload - fld.d $fs4, $sp, 96 # 8-byte Folded Reload - fld.d $fs3, $sp, 104 # 8-byte Folded Reload - fld.d $fs2, $sp, 112 # 8-byte Folded Reload - fld.d $fs1, $sp, 120 # 8-byte Folded Reload - fld.d $fs0, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 136 # 8-byte Folded Reload - ld.d $s7, $sp, 144 # 8-byte Folded Reload - ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s5, $sp, 160 # 8-byte Folded Reload - ld.d $s4, $sp, 168 # 8-byte Folded Reload - ld.d $s3, $sp, 176 # 8-byte Folded Reload - ld.d $s2, $sp, 184 # 8-byte Folded Reload - ld.d $s1, $sp, 192 # 8-byte Folded Reload - ld.d $s0, $sp, 200 # 8-byte Folded Reload - ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $ra, $sp, 216 # 8-byte Folded Reload - addi.d $sp, $sp, 224 + fld.d $fs5, $sp, 72 # 8-byte Folded Reload + fld.d $fs4, $sp, 80 # 8-byte Folded Reload + fld.d $fs3, $sp, 88 # 8-byte Folded Reload + fld.d $fs2, $sp, 96 # 8-byte Folded Reload + fld.d $fs1, $sp, 104 # 8-byte Folded Reload + fld.d $fs0, $sp, 112 # 8-byte Folded Reload + ld.d $s8, $sp, 120 # 8-byte Folded Reload + ld.d $s7, $sp, 128 # 8-byte Folded Reload + ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s5, $sp, 144 # 8-byte Folded Reload + ld.d $s4, $sp, 152 # 8-byte Folded Reload + ld.d $s3, $sp, 160 # 8-byte Folded Reload + ld.d $s2, $sp, 168 # 8-byte Folded Reload + ld.d $s1, $sp, 176 # 8-byte Folded Reload + ld.d $s0, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 192 # 8-byte Folded Reload + ld.d $ra, $sp, 200 # 8-byte Folded Reload + addi.d $sp, $sp, 208 ret .LBB284_25: ld.w $a1, $s1, %pc_lo12(allocated_results) diff --git a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/partialsums.dir/partialsums.s b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/partialsums.dir/partialsums.s index 4132a43f..5a174275 100644 --- a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/partialsums.dir/partialsums.s +++ b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/partialsums.dir/partialsums.s @@ -126,40 +126,37 @@ main: # @main .LBB2_4: # %.preheader.preheader pcalau12i $a0, %pc_hi20(.LCPI2_1) vld $vr0, $a0, %pc_lo12(.LCPI2_1) - vrepli.b $vr10, 0 - lu52i.d $a0, $zero, 1023 - vreplgr2vr.d $vr1, $a0 + vrepli.b $vr11, 0 pcalau12i $a0, %pc_hi20(.LCPI2_2) - vld $vr2, $a0, %pc_lo12(.LCPI2_2) + vld $vr1, $a0, %pc_lo12(.LCPI2_2) pcalau12i $a0, %pc_hi20(.LCPI2_3) - fld.d $fa3, $a0, %pc_lo12(.LCPI2_3) - lu52i.d $a0, $zero, -1025 - vreplgr2vr.d $vr4, $a0 - lu52i.d $a0, $zero, 1024 - vreplgr2vr.d $vr5, $a0 - vori.b $vr11, $vr10, 0 - vori.b $vr12, $vr10, 0 - vori.b $vr9, $vr10, 0 - vori.b $vr8, $vr10, 0 + fld.d $fa2, $a0, %pc_lo12(.LCPI2_3) + vldi $vr3, -912 + vldi $vr4, -784 + vldi $vr5, -1024 + vori.b $vr10, $vr11, 0 + vori.b $vr12, $vr11, 0 + vori.b $vr9, $vr11, 0 + vori.b $vr8, $vr11, 0 .p2align 4, , 16 .LBB2_5: # %.preheader # =>This Inner Loop Header: Depth=1 - vfadd.d $vr6, $vr0, $vr1 + vfadd.d $vr6, $vr0, $vr3 vfmul.d $vr6, $vr0, $vr6 vfrecip.d $vr6, $vr6 vfadd.d $vr12, $vr12, $vr6 vfrecip.d $vr6, $vr0 - vfadd.d $vr10, $vr6, $vr10 + vfadd.d $vr11, $vr6, $vr11 vfmul.d $vr6, $vr0, $vr0 vfrecip.d $vr6, $vr6 - vfadd.d $vr11, $vr6, $vr11 - vfdiv.d $vr6, $vr2, $vr0 + vfadd.d $vr10, $vr6, $vr10 + vfdiv.d $vr6, $vr1, $vr0 vfadd.d $vr9, $vr6, $vr9 vfmadd.d $vr6, $vr0, $vr5, $vr4 vfadd.d $vr0, $vr0, $vr5 vreplvei.d $vr7, $vr0, 0 - fcmp.cle.d $fcc0, $fa7, $fa3 - vfdiv.d $vr6, $vr2, $vr6 + fcmp.cle.d $fcc0, $fa7, $fa2 + vfdiv.d $vr6, $vr1, $vr6 vfadd.d $vr8, $vr8, $vr6 bcnez $fcc0, .LBB2_5 # %bb.6: @@ -171,8 +168,8 @@ main: # @main move $a0, $fp vst $vr8, $sp, 80 # 16-byte Folded Spill vst $vr9, $sp, 64 # 16-byte Folded Spill - vst $vr10, $sp, 32 # 16-byte Folded Spill - vst $vr11, $sp, 48 # 16-byte Folded Spill + vst $vr10, $sp, 48 # 16-byte Folded Spill + vst $vr11, $sp, 32 # 16-byte Folded Spill vst $vr12, $sp, 16 # 16-byte Folded Spill pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 diff --git a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/spectral-norm.dir/spectral-norm.s b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/spectral-norm.dir/spectral-norm.s index 71425cac..9261f1b2 100644 --- a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/spectral-norm.dir/spectral-norm.s +++ b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/spectral-norm.dir/spectral-norm.s @@ -284,8 +284,7 @@ main: # @main slli.d $a0, $a0, 3 slli.d $a2, $s3, 3 bstrins.d $a2, $zero, 5, 0 - lu52i.d $a3, $zero, 1023 - xvreplgr2vr.d $xr0, $a3 + xvldi $xr0, -912 .p2align 4, , 16 .LBB4_6: # %vector.body # =>This Inner Loop Header: Depth=1 diff --git a/results/SingleSource/Benchmarks/Dhrystone/CMakeFiles/fldry.dir/fldry.s b/results/SingleSource/Benchmarks/Dhrystone/CMakeFiles/fldry.dir/fldry.s index 2524897c..e9fd6d36 100644 --- a/results/SingleSource/Benchmarks/Dhrystone/CMakeFiles/fldry.dir/fldry.s +++ b/results/SingleSource/Benchmarks/Dhrystone/CMakeFiles/fldry.dir/fldry.s @@ -72,28 +72,28 @@ Proc0: # @Proc0 jirl $ra, $ra, 0 lu12i.w $a0, 24414 ori $a0, $a0, 256 - lu32i.d $s1, -262144 - lu52i.d $a1, $s1, 1025 - vreplgr2vr.d $vr0, $a1 - pcalau12i $a2, %pc_hi20(Array1Glob) - addi.d $a2, $a2, %pc_lo12(Array1Glob) - lu52i.d $a3, $zero, 1026 - ori $a4, $zero, 3320 + pcalau12i $a1, %pc_hi20(Array1Glob) + addi.d $a1, $a1, %pc_lo12(Array1Glob) + vldi $vr0, -996 + lu52i.d $a2, $zero, 1026 + ori $a3, $zero, 3320 vldi $vr1, -912 + lu32i.d $s1, -262144 + lu52i.d $a4, $s1, 1025 lu52i.d $a5, $s2, 1025 .p2align 4, , 16 .LBB1_1: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 - vst $vr0, $a2, 64 - st.d $a3, $a2, 304 - fldx.d $fa2, $fp, $a4 + vst $vr0, $a1, 64 + st.d $a2, $a1, 304 + fldx.d $fa2, $fp, $a3 ld.d $a6, $s0, %pc_lo12(PtrGlb) - stptr.d $a3, $fp, 3328 - stptr.d $a3, $fp, 3336 + stptr.d $a2, $fp, 3328 + stptr.d $a2, $fp, 3336 fadd.d $fa2, $fa2, $fa1 ld.d $a7, $a6, 0 - fstx.d $fa2, $fp, $a4 - stptr.d $a1, $fp, 11488 + fstx.d $fa2, $fp, $a3 + stptr.d $a4, $fp, 11488 st.d $a5, $a6, 16 st.d $a5, $a7, 16 addi.w $a0, $a0, -1 diff --git a/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/oopack_v1p8.dir/oopack_v1p8.s b/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/oopack_v1p8.dir/oopack_v1p8.s index 4e887bae..3b031951 100644 --- a/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/oopack_v1p8.dir/oopack_v1p8.s +++ b/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/oopack_v1p8.dir/oopack_v1p8.s @@ -531,12 +531,11 @@ _ZNK16ComplexBenchmark9oop_styleEv: # @_ZNK16ComplexBenchmark9oop_styleEv addi.d $a1, $a1, %pc_lo12(Y) pcalau12i $a2, %pc_hi20(X) addi.d $a2, $a2, %pc_lo12(X) + pcalau12i $a3, %pc_hi20(.LCPI13_0) + vld $vr0, $a3, %pc_lo12(.LCPI13_0) lu12i.w $a3, 3 - pcalau12i $a4, %pc_hi20(.LCPI13_0) - vld $vr0, $a4, %pc_lo12(.LCPI13_0) ori $a3, $a3, 3712 - lu52i.d $a4, $zero, 1022 - vreplgr2vr.d $vr1, $a4 + vldi $vr1, -928 .p2align 4, , 16 .LBB13_1: # =>This Inner Loop Header: Depth=1 add.d $a4, $a2, $a0 diff --git a/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/stepanov_v1p2.dir/stepanov_v1p2.s b/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/stepanov_v1p2.dir/stepanov_v1p2.s index 1ee8c6ed..37bc719b 100644 --- a/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/stepanov_v1p2.dir/stepanov_v1p2.s +++ b/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/stepanov_v1p2.dir/stepanov_v1p2.s @@ -367,10 +367,7 @@ main: # @main slli.d $a2, $a2, 6 add.d $a2, $a0, $a2 addi.d $a5, $a0, 32 - ori $a6, $zero, 0 - lu32i.d $a6, -524288 - lu52i.d $a6, $a6, 1024 - xvreplgr2vr.d $xr0, $a6 + xvldi $xr0, -1016 move $a6, $a4 .p2align 4, , 16 .LBB2_5: # %vector.body @@ -415,10 +412,7 @@ main: # @main slli.d $a3, $a3, 6 add.d $a3, $a6, $a3 addi.d $a6, $a6, 32 - ori $a7, $zero, 0 - lu32i.d $a7, -524288 - lu52i.d $a7, $a7, 1024 - xvreplgr2vr.d $xr0, $a7 + xvldi $xr0, -1016 move $a7, $a5 .p2align 4, , 16 .LBB2_13: # %vector.body36 diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/himenobmtxpa.dir/himenobmtxpa.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/himenobmtxpa.dir/himenobmtxpa.s index ed169e8d..7574c6ac 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/himenobmtxpa.dir/himenobmtxpa.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/himenobmtxpa.dir/himenobmtxpa.s @@ -183,8 +183,7 @@ main: # @main move $a0, $zero ld.d $a1, $sp, 8 # 8-byte Folded Reload addi.d $a1, $a1, 256 - lu12i.w $a2, 260096 - xvreplgr2vr.w $xr0, $a2 + xvldi $xr0, -1424 ori $a2, $zero, 64 .p2align 4, , 16 .LBB0_5: # %.preheader23.us.us.i diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/whetstone.dir/whetstone.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/whetstone.dir/whetstone.s index ca092b9e..7334c3da 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/whetstone.dir/whetstone.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/whetstone.dir/whetstone.s @@ -81,7 +81,7 @@ main: # @main .LBB0_9: # %.preheader st.d $a1, $sp, 192 # 8-byte Folded Spill alsl.d $a1, $a0, $a0, 1 - slli.d $s3, $a1, 2 + slli.d $s2, $a1, 2 ori $a1, $zero, 14 mul.d $s1, $a0, $a1 ori $a1, $zero, 345 @@ -98,9 +98,9 @@ main: # @main ori $a1, $zero, 93 mul.d $a5, $a0, $a1 ori $a2, $zero, 1 - slt $a0, $a2, $s3 + slt $a0, $a2, $s2 masknez $a1, $a2, $a0 - maskeqz $a0, $s3, $a0 + maskeqz $a0, $s2, $a0 or $a0, $a0, $a1 st.d $a0, $sp, 136 # 8-byte Folded Spill slt $a0, $a2, $s1 @@ -168,12 +168,13 @@ main: # @main lu52i.d $a0, $a0, 1025 st.d $a0, $sp, 80 # 8-byte Folded Spill st.d $s1, $sp, 184 # 8-byte Folded Spill - pcalau12i $s6, %pc_hi20(J) + pcalau12i $s6, %pc_hi20(T2) + pcalau12i $s3, %pc_hi20(J) b .LBB0_11 .p2align 4, , 16 .LBB0_10: # %._crit_edge267 # in Loop: Header=BB0_11 Depth=1 - ld.w $a2, $s6, %pc_lo12(J) + ld.w $a2, $s3, %pc_lo12(J) ld.w $a3, $s1, %pc_lo12(K) movfr2gr.d $a4, $fa0 pcalau12i $a0, %pc_hi20(.L.str.3) @@ -212,9 +213,8 @@ main: # @main ld.d $a0, $sp, 88 # 8-byte Folded Reload st.d $a1, $sp, 200 # 8-byte Folded Spill st.d $a0, $a1, %pc_lo12(T1) - pcalau12i $s0, %pc_hi20(T2) lu52i.d $a0, $zero, 1024 - st.d $a0, $s0, %pc_lo12(T2) + st.d $a0, $s6, %pc_lo12(T2) pcalau12i $a0, %pc_hi20(.L.str.3) addi.d $a0, $a0, %pc_lo12(.L.str.3) move $a1, $zero @@ -256,30 +256,30 @@ main: # @main # %bb.13: # %.lr.ph211 # in Loop: Header=BB0_11 Depth=1 pcalau12i $a0, %pc_hi20(E1) - addi.d $s2, $a0, %pc_lo12(E1) - fst.d $fa1, $s2, 8 - fst.d $fa0, $s2, 16 - fst.d $fa2, $s2, 24 - fst.d $fa4, $s2, 32 + addi.d $s0, $a0, %pc_lo12(E1) + fst.d $fa1, $s0, 8 + fst.d $fa0, $s0, 16 + fst.d $fa2, $s0, 24 + fst.d $fa4, $s0, 32 movfr2gr.d $a4, $fa1 movfr2gr.d $a5, $fa0 movfr2gr.d $a6, $fa2 movfr2gr.d $a7, $fa4 pcalau12i $a0, %pc_hi20(.L.str.3) addi.d $a0, $a0, %pc_lo12(.L.str.3) - move $a1, $s3 + move $a1, $s2 move $a2, $s1 - move $a3, $s3 + move $a3, $s2 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - fld.d $fa2, $s2, 32 - fld.d $fa3, $s2, 24 - fld.d $fa4, $s2, 16 - fld.d $fa5, $s2, 8 + fld.d $fa2, $s0, 32 + fld.d $fa3, $s0, 24 + fld.d $fa4, $s0, 16 + fld.d $fa5, $s0, 8 fld.d $fa0, $s5, %pc_lo12(T) - fld.d $fa1, $s0, %pc_lo12(T2) + fld.d $fa1, $s6, %pc_lo12(T2) ori $a0, $zero, 6 - st.w $a0, $s6, %pc_lo12(J) + st.w $a0, $s3, %pc_lo12(J) ld.d $a0, $sp, 128 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_14: # %PA.exit @@ -385,10 +385,10 @@ main: # @main bnez $a0, .LBB0_14 # %bb.15: # %iter.check # in Loop: Header=BB0_11 Depth=1 - fst.d $fa5, $s2, 8 - fst.d $fa4, $s2, 16 - fst.d $fa3, $s2, 24 - fst.d $fa2, $s2, 32 + fst.d $fa5, $s0, 8 + fst.d $fa4, $s0, 16 + fst.d $fa3, $s0, 24 + fst.d $fa2, $s0, 32 movfr2gr.d $a4, $fa5 movfr2gr.d $a5, $fa4 movfr2gr.d $a6, $fa3 @@ -396,8 +396,8 @@ main: # @main pcalau12i $a0, %pc_hi20(.L.str.3) addi.d $a0, $a0, %pc_lo12(.L.str.3) move $a1, $s1 - move $a2, $s3 - move $a3, $s3 + move $a2, $s2 + move $a3, $s2 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 ori $a0, $zero, 1 @@ -484,7 +484,7 @@ main: # @main bnez $a0, .LBB0_26 .LBB0_27: # %.lr.ph224.preheader # in Loop: Header=BB0_11 Depth=1 - st.w $a2, $s6, %pc_lo12(J) + st.w $a2, $s3, %pc_lo12(J) pcalau12i $a0, %pc_hi20(.L.str.3) addi.d $s4, $a0, %pc_lo12(.L.str.3) move $a0, $s4 @@ -497,15 +497,14 @@ main: # @main move $a7, $s8 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.d $a4, $sp, 80 # 8-byte Folded Reload - vreplgr2vr.d $vr0, $a4 - vst $vr0, $s2, 8 + vldi $vr0, -1000 + vst $vr0, $s0, 8 ori $a0, $zero, 1 - st.w $a0, $s6, %pc_lo12(J) + st.w $a0, $s3, %pc_lo12(J) pcalau12i $s1, %pc_hi20(K) - ld.d $a7, $s2, 32 - ld.d $a6, $s2, 24 - ld.d $a5, $s2, 16 + ld.d $a7, $s0, 32 + ld.d $a6, $s0, 24 + ld.d $a5, $s0, 16 ori $a0, $zero, 2 st.w $a0, $s1, %pc_lo12(K) pcalau12i $s7, %pc_hi20(L) @@ -515,10 +514,11 @@ main: # @main ori $a3, $zero, 2 move $a0, $s4 ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a4, $sp, 80 # 8-byte Folded Reload pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 fld.d $fs1, $s5, %pc_lo12(T) - fld.d $fs2, $s0, %pc_lo12(T2) + fld.d $fs2, $s6, %pc_lo12(T2) vldi $vr0, -928 ld.d $s4, $sp, 120 # 8-byte Folded Reload vldi $vr1, -928 @@ -589,7 +589,7 @@ main: # @main bnez $s4, .LBB0_28 # %bb.29: # %.lr.ph257.preheader # in Loop: Header=BB0_11 Depth=1 - ld.w $a2, $s6, %pc_lo12(J) + ld.w $a2, $s3, %pc_lo12(J) ld.w $a3, $s1, %pc_lo12(K) vld $vr1, $sp, 224 # 16-byte Folded Reload movfr2gr.d $a4, $fa1 @@ -607,8 +607,8 @@ main: # @main vldi $vr2, -912 fadd.d $fa2, $fa1, $fa2 fmul.d $fa0, $fa0, $fa2 - fld.d $fa2, $s0, %pc_lo12(T2) - ld.w $a2, $s6, %pc_lo12(J) + fld.d $fa2, $s6, %pc_lo12(T2) + ld.w $a2, $s3, %pc_lo12(J) ld.w $a3, $s1, %pc_lo12(K) fadd.d $fa0, $fa1, $fa0 fdiv.d $fa0, $fa0, $fa2 @@ -621,7 +621,7 @@ main: # @main pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 ori $a0, $zero, 1 - st.w $a0, $s6, %pc_lo12(J) + st.w $a0, $s3, %pc_lo12(J) ori $a0, $zero, 2 st.w $a0, $s1, %pc_lo12(K) ori $a0, $zero, 3 @@ -640,10 +640,10 @@ main: # @main bnez $a0, .LBB0_30 # %bb.31: # %.lr.ph266 # in Loop: Header=BB0_11 Depth=1 - ld.d $a7, $s2, 32 - fst.d $fa0, $s2, 8 - fst.d $fa1, $s2, 16 - fst.d $fa0, $s2, 24 + ld.d $a7, $s0, 32 + fst.d $fa0, $s0, 8 + fst.d $fa1, $s0, 16 + fst.d $fa0, $s0, 24 movfr2gr.d $a5, $fa1 movfr2gr.d $a4, $fa0 pcalau12i $a0, %pc_hi20(.L.str.3) @@ -656,7 +656,7 @@ main: # @main pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 ori $a0, $zero, 2 - st.w $a0, $s6, %pc_lo12(J) + st.w $a0, $s3, %pc_lo12(J) ori $a0, $zero, 3 st.w $a0, $s1, %pc_lo12(K) ori $a2, $zero, 2 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/symm/CMakeFiles/symm.dir/symm.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/symm/CMakeFiles/symm.dir/symm.s index 316a355e..9fb7225b 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/symm/CMakeFiles/symm.dir/symm.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/symm/CMakeFiles/symm.dir/symm.s @@ -454,10 +454,7 @@ main: # @main lu32i.d $a4, 209715 lu52i.d $a4, $a4, 1023 xvreplgr2vr.d $xr3, $a4 - ori $a4, $zero, 0 - lu32i.d $a4, -524288 - lu52i.d $a4, $a4, 1023 - xvreplgr2vr.d $xr4, $a4 + xvldi $xr4, -904 xvrepli.b $xr5, 0 ori $a4, $zero, 60 ori $a5, $zero, 80 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/syr2k/CMakeFiles/syr2k.dir/syr2k.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/syr2k/CMakeFiles/syr2k.dir/syr2k.s index 4603108f..7b5b7ff1 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/syr2k/CMakeFiles/syr2k.dir/syr2k.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/syr2k/CMakeFiles/syr2k.dir/syr2k.s @@ -430,22 +430,19 @@ main: # @main addi.w $t4, $zero, -8 ori $a1, $a4, 1416 st.d $a1, $sp, 56 # 8-byte Folded Spill - vldi $vr1, -904 + vldi $vr2, -904 ori $t6, $zero, 1000 lu12i.w $a1, -4 ori $a2, $a1, 384 ori $a3, $a3, 192 - ori $a1, $zero, 0 - lu32i.d $a1, -524288 - lu52i.d $a1, $a1, 1023 - xvreplgr2vr.d $xr2, $a1 + xvldi $xr0, -904 lu12i.w $a1, 7 ori $a4, $a1, 3328 lu12i.w $a1, 209715 ori $a1, $a1, 819 lu32i.d $a1, 209715 lu52i.d $a1, $a1, 1023 - xvreplgr2vr.d $xr0, $a1 + xvreplgr2vr.d $xr1, $a1 pcalau12i $a5, %pc_hi20(.LCPI7_3) b .LBB7_24 .p2align 4, , 16 @@ -483,8 +480,8 @@ main: # @main # => This Inner Loop Header: Depth=2 xvld $xr3, $t1, -32 xvld $xr4, $t1, 0 - xvfmul.d $xr3, $xr3, $xr0 - xvfmul.d $xr4, $xr4, $xr0 + xvfmul.d $xr3, $xr3, $xr1 + xvfmul.d $xr4, $xr4, $xr1 xvst $xr3, $t1, -32 xvst $xr4, $t1, 0 addi.d $a6, $a6, -8 @@ -592,12 +589,12 @@ main: # @main fldx.d $fa7, $t2, $a3 fld.d $ft0, $t2, 0 fldx.d $ft1, $t2, $a0 - xvfmul.d $xr5, $xr5, $xr2 + xvfmul.d $xr5, $xr5, $xr0 xvinsve0.d $xr6, $xr7, 1 xvinsve0.d $xr6, $xr8, 2 xvinsve0.d $xr6, $xr9, 3 xvld $xr7, $fp, 0 - xvfmul.d $xr6, $xr6, $xr2 + xvfmul.d $xr6, $xr6, $xr0 xvfmul.d $xr6, $xr6, $xr4 xvfmadd.d $xr5, $xr5, $xr3, $xr6 xvfadd.d $xr5, $xr7, $xr5 @@ -626,10 +623,10 @@ main: # @main fld.d $fa4, $fp, 0 fld.d $fa5, $a1, 0 fld.d $fa6, $t7, 0 - fmul.d $fa3, $fa3, $fa1 + fmul.d $fa3, $fa3, $fa2 fldx.d $fa7, $t8, $t1 fmul.d $fa3, $fa3, $fa5 - fmul.d $fa4, $fa4, $fa1 + fmul.d $fa4, $fa4, $fa2 fmadd.d $fa3, $fa4, $fa6, $fa3 fadd.d $fa3, $fa7, $fa3 fstx.d $fa3, $t8, $t1 @@ -652,12 +649,8 @@ main: # @main st.d $a1, $sp, 24 # 8-byte Folded Spill ori $t2, $zero, 1 addi.w $t3, $zero, -8 - vldi $vr1, -904 + vldi $vr2, -904 ori $t5, $zero, 1000 - ori $a1, $zero, 0 - lu32i.d $a1, -524288 - lu52i.d $a1, $a1, 1023 - xvreplgr2vr.d $xr2, $a1 b .LBB7_42 .p2align 4, , 16 .LBB7_41: # in Loop: Header=BB7_42 Depth=1 @@ -694,8 +687,8 @@ main: # @main # => This Inner Loop Header: Depth=2 xvld $xr3, $t1, -32 xvld $xr4, $t1, 0 - xvfmul.d $xr3, $xr3, $xr0 - xvfmul.d $xr4, $xr4, $xr0 + xvfmul.d $xr3, $xr3, $xr1 + xvfmul.d $xr4, $xr4, $xr1 xvst $xr3, $t1, -32 xvst $xr4, $t1, 0 addi.d $a6, $a6, -8 @@ -799,7 +792,7 @@ main: # @main xvinsve0.d $xr5, $xr6, 1 xvinsve0.d $xr5, $xr7, 2 xvinsve0.d $xr5, $xr8, 3 - xvfmul.d $xr5, $xr5, $xr2 + xvfmul.d $xr5, $xr5, $xr0 fldx.d $fa6, $s2, $a2 fldx.d $fa7, $s2, $a3 fld.d $ft0, $s2, 0 @@ -809,7 +802,7 @@ main: # @main xvinsve0.d $xr6, $xr8, 2 xvinsve0.d $xr6, $xr9, 3 xvld $xr7, $t7, 0 - xvfmul.d $xr6, $xr6, $xr2 + xvfmul.d $xr6, $xr6, $xr0 xvfmul.d $xr6, $xr6, $xr4 xvfadd.d $xr5, $xr5, $xr6 xvfadd.d $xr5, $xr7, $xr5 @@ -836,12 +829,12 @@ main: # @main # => This Inner Loop Header: Depth=3 fld.d $fa3, $s2, 0 fld.d $fa4, $t6, 0 - fmul.d $fa3, $fa3, $fa1 + fmul.d $fa3, $fa3, $fa2 fld.d $fa5, $t7, 0 fld.d $fa6, $a1, 0 fmul.d $fa3, $fa3, $fa4 fldx.d $fa4, $fp, $t1 - fmul.d $fa5, $fa5, $fa1 + fmul.d $fa5, $fa5, $fa2 fmul.d $fa5, $fa5, $fa6 fadd.d $fa3, $fa3, $fa5 fadd.d $fa3, $fa4, $fa3 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/trmm/CMakeFiles/trmm.dir/trmm.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/trmm/CMakeFiles/trmm.dir/trmm.s index 8b6257fa..f75e5726 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/trmm/CMakeFiles/trmm.dir/trmm.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/trmm/CMakeFiles/trmm.dir/trmm.s @@ -317,10 +317,7 @@ main: # @main ori $a6, $zero, 1200 ori $a7, $zero, 1000 ori $t0, $s3, 2688 - ori $t1, $zero, 0 - lu32i.d $t1, -524288 - lu52i.d $t1, $t1, 1023 - xvreplgr2vr.d $xr1, $t1 + xvldi $xr1, -904 ori $t1, $zero, 999 move $t2, $s0 b .LBB7_18 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/atax/CMakeFiles/atax.dir/atax.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/atax/CMakeFiles/atax.dir/atax.s index 36eeffd7..98355e1c 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/atax/CMakeFiles/atax.dir/atax.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/atax/CMakeFiles/atax.dir/atax.s @@ -195,8 +195,7 @@ main: # @main lu32i.d $a1, 26624 lu52i.d $a1, $a1, 1034 xvreplgr2vr.d $xr1, $a1 - lu52i.d $a1, $zero, 1023 - xvreplgr2vr.d $xr2, $a1 + xvldi $xr2, -912 .p2align 4, , 16 .LBB7_9: # %vector.body # =>This Inner Loop Header: Depth=1 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/cholesky/CMakeFiles/cholesky.dir/cholesky.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/cholesky/CMakeFiles/cholesky.dir/cholesky.s index b60b53db..8aa2f3bd 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/cholesky/CMakeFiles/cholesky.dir/cholesky.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/cholesky/CMakeFiles/cholesky.dir/cholesky.s @@ -185,7 +185,7 @@ main: # @main lu32i.d $t5, -49152 lu52i.d $t5, $t5, 1033 xvreplgr2vr.d $xr3, $t5 - xvreplgr2vr.d $xr4, $t1 + xvldi $xr4, -912 ori $t5, $zero, 1999 move $t6, $fp move $t7, $s0 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/gramschmidt/CMakeFiles/gramschmidt.dir/gramschmidt.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/gramschmidt/CMakeFiles/gramschmidt.dir/gramschmidt.s index e2d518ce..8155a32a 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/gramschmidt/CMakeFiles/gramschmidt.dir/gramschmidt.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/gramschmidt/CMakeFiles/gramschmidt.dir/gramschmidt.s @@ -203,13 +203,10 @@ main: # @main lu32i.d $t0, -49152 lu52i.d $t0, $t0, 1032 xvreplgr2vr.d $xr6, $t0 - ori $t0, $zero, 0 - lu32i.d $t0, -458752 - lu52i.d $t0, $t0, 1029 - xvreplgr2vr.d $xr7, $t0 - lu32i.d $a7, 262144 - lu52i.d $a7, $a7, 1026 - xvreplgr2vr.d $xr8, $a7 + lu32i.d $a7, -458752 + lu52i.d $a7, $a7, 1029 + xvreplgr2vr.d $xr7, $a7 + xvldi $xr8, -988 xvrepli.b $xr9, 0 move $a7, $t1 st.d $t2, $sp, 88 # 8-byte Folded Spill diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/lu/CMakeFiles/lu.dir/lu.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/lu/CMakeFiles/lu.dir/lu.s index 10d23e2d..87f37d40 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/lu/CMakeFiles/lu.dir/lu.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/lu/CMakeFiles/lu.dir/lu.s @@ -581,7 +581,7 @@ init_array: # @init_array lu32i.d $a0, -49152 lu52i.d $a0, $a0, 1033 xvreplgr2vr.d $xr3, $a0 - xvreplgr2vr.d $xr4, $s3 + xvldi $xr4, -912 move $fp, $s5 xvst $xr3, $sp, 48 # 32-byte Folded Spill xvst $xr4, $sp, 16 # 32-byte Folded Spill diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/ludcmp/CMakeFiles/ludcmp.dir/ludcmp.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/ludcmp/CMakeFiles/ludcmp.dir/ludcmp.s index 4239c90d..cdb04de8 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/ludcmp/CMakeFiles/ludcmp.dir/ludcmp.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/ludcmp/CMakeFiles/ludcmp.dir/ludcmp.s @@ -700,10 +700,8 @@ init_array: # @init_array lu32i.d $a6, -49152 lu52i.d $a6, $a6, 1033 xvreplgr2vr.d $xr2, $a6 - lu52i.d $a6, $zero, 1022 - xvreplgr2vr.d $xr3, $a6 - lu52i.d $a6, $zero, 1025 - xvreplgr2vr.d $xr4, $a6 + xvldi $xr3, -928 + xvldi $xr4, -1008 .p2align 4, , 16 .LBB8_4: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -778,7 +776,7 @@ init_array: # @init_array lu32i.d $a0, -49152 lu52i.d $a0, $a0, 1033 xvreplgr2vr.d $xr3, $a0 - xvreplgr2vr.d $xr4, $s2 + xvldi $xr4, -912 move $fp, $s6 xvst $xr3, $sp, 48 # 32-byte Folded Spill xvst $xr4, $sp, 16 # 32-byte Folded Spill diff --git a/results/SingleSource/Benchmarks/Polybench/medley/deriche/CMakeFiles/deriche.dir/deriche.s b/results/SingleSource/Benchmarks/Polybench/medley/deriche/CMakeFiles/deriche.dir/deriche.s index a6dda1cc..f098da62 100644 --- a/results/SingleSource/Benchmarks/Polybench/medley/deriche/CMakeFiles/deriche.dir/deriche.s +++ b/results/SingleSource/Benchmarks/Polybench/medley/deriche/CMakeFiles/deriche.dir/deriche.s @@ -220,9 +220,7 @@ main: # @main ori $a3, $s6, 3648 ori $a2, $zero, 991 xvreplgr2vr.d $xr2, $a2 - lu12i.w $a2, 15 - ori $a2, $a2, 4095 - xvreplgr2vr.w $xr6, $a2 + xvldi $xr6, -2305 lu12i.w $a2, 292863 ori $a2, $a2, 3840 xvreplgr2vr.w $xr7, $a2 diff --git a/results/SingleSource/Benchmarks/Polybench/stencils/fdtd-2d/CMakeFiles/fdtd-2d.dir/fdtd-2d.s b/results/SingleSource/Benchmarks/Polybench/stencils/fdtd-2d/CMakeFiles/fdtd-2d.dir/fdtd-2d.s index 4cb23dfa..b5bd746c 100644 --- a/results/SingleSource/Benchmarks/Polybench/stencils/fdtd-2d/CMakeFiles/fdtd-2d.dir/fdtd-2d.s +++ b/results/SingleSource/Benchmarks/Polybench/stencils/fdtd-2d/CMakeFiles/fdtd-2d.dir/fdtd-2d.s @@ -433,8 +433,7 @@ main: # @main lu32i.d $a3, 419430 lu52i.d $a3, $a3, -1026 xvreplgr2vr.d $xr0, $a3 - lu52i.d $a3, $zero, -1026 - xvreplgr2vr.d $xr5, $a3 + xvldi $xr5, -800 st.d $t8, $sp, 112 # 8-byte Folded Spill .p2align 4, , 16 .LBB7_24: # %vector.ph180 @@ -881,8 +880,7 @@ main: # @main vldi $vr1, -800 ori $t8, $zero, 1000 ori $t3, $zero, 999 - lu52i.d $a7, $zero, 1022 - xvreplgr2vr.d $xr2, $a7 + xvldi $xr2, -928 .p2align 4, , 16 .LBB7_59: # %vector.ph278 # =>This Loop Header: Depth=1 diff --git a/results/SingleSource/Benchmarks/Polybench/stencils/heat-3d/CMakeFiles/heat-3d.dir/heat-3d.s b/results/SingleSource/Benchmarks/Polybench/stencils/heat-3d/CMakeFiles/heat-3d.dir/heat-3d.s index fb3a195a..4dd68545 100644 --- a/results/SingleSource/Benchmarks/Polybench/stencils/heat-3d/CMakeFiles/heat-3d.dir/heat-3d.s +++ b/results/SingleSource/Benchmarks/Polybench/stencils/heat-3d/CMakeFiles/heat-3d.dir/heat-3d.s @@ -170,8 +170,8 @@ main: # @main bnez $a0, .LBB7_44 # %bb.6: # %polybench_alloc_data.exit14 move $a0, $zero - ld.d $t0, $sp, 136 # 8-byte Folded Reload - sub.d $a2, $fp, $t0 + ld.d $a7, $sp, 136 # 8-byte Folded Reload + sub.d $a2, $fp, $a7 ori $a3, $zero, 120 ori $a4, $zero, 31 vldi $vr0, -988 @@ -182,16 +182,12 @@ main: # @main ori $s3, $a1, 512 pcalau12i $a6, %pc_hi20(.LCPI7_0) xvld $xr2, $a6, %pc_lo12(.LCPI7_0) + xvldi $xr3, -988 ori $a6, $zero, 0 - ori $a7, $zero, 0 - lu32i.d $a7, 262144 - lu52i.d $a7, $a7, 1026 - xvreplgr2vr.d $xr3, $a7 lu32i.d $a6, -131072 lu52i.d $a6, $a6, 1029 xvreplgr2vr.d $xr4, $a6 ori $a6, $zero, 120 - move $a7, $t0 move $t0, $fp b .LBB7_8 .p2align 4, , 16 @@ -308,10 +304,8 @@ main: # @main vldi $vr1, -960 ori $t8, $zero, 952 ori $s2, $zero, 119 - lu52i.d $a0, $zero, -1024 - xvreplgr2vr.d $xr2, $a0 - lu52i.d $a0, $zero, 1020 - xvreplgr2vr.d $xr3, $a0 + xvldi $xr2, -896 + xvldi $xr3, -960 ori $s5, $zero, 928 .p2align 4, , 16 .LBB7_16: # %.preheader117.i diff --git a/results/SingleSource/Benchmarks/SmallPT/CMakeFiles/smallpt.dir/smallpt.s b/results/SingleSource/Benchmarks/SmallPT/CMakeFiles/smallpt.dir/smallpt.s index a93e5dc1..870eb8dd 100644 --- a/results/SingleSource/Benchmarks/SmallPT/CMakeFiles/smallpt.dir/smallpt.s +++ b/results/SingleSource/Benchmarks/SmallPT/CMakeFiles/smallpt.dir/smallpt.s @@ -1078,27 +1078,27 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt main: # @main .cfi_startproc # %bb.0: - addi.d $sp, $sp, -544 - .cfi_def_cfa_offset 544 - st.d $ra, $sp, 536 # 8-byte Folded Spill - st.d $fp, $sp, 528 # 8-byte Folded Spill - st.d $s0, $sp, 520 # 8-byte Folded Spill - st.d $s1, $sp, 512 # 8-byte Folded Spill - st.d $s2, $sp, 504 # 8-byte Folded Spill - st.d $s3, $sp, 496 # 8-byte Folded Spill - st.d $s4, $sp, 488 # 8-byte Folded Spill - st.d $s5, $sp, 480 # 8-byte Folded Spill - st.d $s6, $sp, 472 # 8-byte Folded Spill - st.d $s7, $sp, 464 # 8-byte Folded Spill - st.d $s8, $sp, 456 # 8-byte Folded Spill - fst.d $fs0, $sp, 448 # 8-byte Folded Spill - fst.d $fs1, $sp, 440 # 8-byte Folded Spill - fst.d $fs2, $sp, 432 # 8-byte Folded Spill - fst.d $fs3, $sp, 424 # 8-byte Folded Spill - fst.d $fs4, $sp, 416 # 8-byte Folded Spill - fst.d $fs5, $sp, 408 # 8-byte Folded Spill - fst.d $fs6, $sp, 400 # 8-byte Folded Spill - fst.d $fs7, $sp, 392 # 8-byte Folded Spill + addi.d $sp, $sp, -496 + .cfi_def_cfa_offset 496 + st.d $ra, $sp, 488 # 8-byte Folded Spill + st.d $fp, $sp, 480 # 8-byte Folded Spill + st.d $s0, $sp, 472 # 8-byte Folded Spill + st.d $s1, $sp, 464 # 8-byte Folded Spill + st.d $s2, $sp, 456 # 8-byte Folded Spill + st.d $s3, $sp, 448 # 8-byte Folded Spill + st.d $s4, $sp, 440 # 8-byte Folded Spill + st.d $s5, $sp, 432 # 8-byte Folded Spill + st.d $s6, $sp, 424 # 8-byte Folded Spill + st.d $s7, $sp, 416 # 8-byte Folded Spill + st.d $s8, $sp, 408 # 8-byte Folded Spill + fst.d $fs0, $sp, 400 # 8-byte Folded Spill + fst.d $fs1, $sp, 392 # 8-byte Folded Spill + fst.d $fs2, $sp, 384 # 8-byte Folded Spill + fst.d $fs3, $sp, 376 # 8-byte Folded Spill + fst.d $fs4, $sp, 368 # 8-byte Folded Spill + fst.d $fs5, $sp, 360 # 8-byte Folded Spill + fst.d $fs6, $sp, 352 # 8-byte Folded Spill + fst.d $fs7, $sp, 344 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -1119,7 +1119,7 @@ main: # @main .cfi_offset 62, -144 .cfi_offset 63, -152 ori $a2, $zero, 2 - ori $fp, $zero, 1 + ori $s0, $zero, 1 bne $a0, $a2, .LBB1_2 # %bb.1: ld.d $a0, $a1, 8 @@ -1130,78 +1130,75 @@ main: # @main addi.w $a1, $a0, 0 bstrpick.d $a1, $a1, 62, 61 add.w $a0, $a0, $a1 - srai.d $fp, $a0, 2 + srai.d $s0, $a0, 2 .LBB1_2: - lu12i.w $s0, 4608 - move $a0, $s0 + lu12i.w $s1, 4608 + move $a0, $s1 pcaddu18i $ra, %call36(_Znam) jirl $ra, $ra, 0 - move $s1, $a0 + move $fp, $a0 move $a1, $zero - move $a2, $s0 + move $a2, $s1 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - slli.d $a2, $fp, 2 + slli.d $a2, $s0, 2 pcalau12i $a1, %pc_hi20(.L.str) addi.d $a1, $a1, %pc_lo12(.L.str) pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 lu12i.w $a2, 4602 vrepli.b $vr3, 0 - blez $fp, .LBB1_41 + blez $s0, .LBB1_41 # %bb.3: # %.split148.us.us.preheader move $a1, $zero move $a3, $zero - movgr2fr.w $fa0, $fp + movgr2fr.w $fa0, $s0 ffint.d.w $fa0, $fa0 frecip.d $fa0, $fa0 - vst $vr0, $sp, 272 # 16-byte Folded Spill + vst $vr0, $sp, 224 # 16-byte Folded Spill vreplvei.d $vr0, $vr0, 0 - vst $vr0, $sp, 256 # 16-byte Folded Spill + vst $vr0, $sp, 208 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_0) fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) fst.d $fa0, $sp, 16 # 8-byte Folded Spill - ori $a0, $a2, 16 - st.d $a0, $sp, 40 # 8-byte Folded Spill + ori $s6, $a2, 16 movgr2fr.d $fa0, $zero - fst.d $fa0, $sp, 56 # 8-byte Folded Spill + fst.d $fa0, $sp, 40 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_1) fld.d $fa0, $a0, %pc_lo12(.LCPI1_1) - fst.d $fa0, $sp, 248 # 8-byte Folded Spill + fst.d $fa0, $sp, 200 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_2) fld.d $fa0, $a0, %pc_lo12(.LCPI1_2) - fst.d $fa0, $sp, 240 # 8-byte Folded Spill + fst.d $fa0, $sp, 192 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_3) fld.d $fs4, $a0, %pc_lo12(.LCPI1_3) pcalau12i $a0, %pc_hi20(.LCPI1_4) vld $vr0, $a0, %pc_lo12(.LCPI1_4) - vst $vr0, $sp, 224 # 16-byte Folded Spill + vst $vr0, $sp, 176 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_5) vld $vr0, $a0, %pc_lo12(.LCPI1_5) - vst $vr0, $sp, 208 # 16-byte Folded Spill + vst $vr0, $sp, 160 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_6) vld $vr0, $a0, %pc_lo12(.LCPI1_6) - vst $vr0, $sp, 192 # 16-byte Folded Spill + vst $vr0, $sp, 144 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_7) fld.d $fs5, $a0, %pc_lo12(.LCPI1_7) ori $a0, $zero, 0 lu32i.d $a0, 98304 lu52i.d $a0, $a0, 1030 vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 176 # 16-byte Folded Spill + vst $vr0, $sp, 128 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_8) fld.d $fs6, $a0, %pc_lo12(.LCPI1_8) pcalau12i $a0, %pc_hi20(.LCPI1_9) vld $vr0, $a0, %pc_lo12(.LCPI1_9) - vst $vr0, $sp, 160 # 16-byte Folded Spill + vst $vr0, $sp, 112 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI1_10) fld.d $fs7, $a0, %pc_lo12(.LCPI1_10) - lu52i.d $s7, $zero, 1023 - lu52i.d $s8, $zero, 1021 - vst $vr3, $sp, 64 # 16-byte Folded Spill + vst $vr3, $sp, 48 # 16-byte Folded Spill b .LBB1_5 .p2align 4, , 16 .LBB1_4: # %.split150.us.us @@ -1218,14 +1215,13 @@ main: # @main # Child Loop BB1_9 Depth 3 # Child Loop BB1_13 Depth 4 # Child Loop BB1_29 Depth 4 - move $s5, $zero - st.w $zero, $sp, 386 + move $s4, $zero + st.w $zero, $sp, 338 st.d $a3, $sp, 24 # 8-byte Folded Spill mul.d $a0, $a3, $a3 mul.d $a0, $a0, $a1 - st.h $a0, $sp, 390 - slli.d $a0, $a1, 10 - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.h $a0, $sp, 342 + slli.d $s5, $a1, 10 srli.d $a0, $a1, 32 lu52i.d $a3, $zero, 1107 or $a0, $a0, $a3 @@ -1242,64 +1238,58 @@ main: # @main .p2align 4, , 16 .LBB1_6: # %.split141.us.us.us # in Loop: Header=BB1_7 Depth=2 - addi.d $s5, $s5, 1 - move $s1, $s6 - move $a2, $s3 + addi.d $s4, $s4, 1 + move $a2, $s7 ori $a0, $zero, 1024 - beq $s5, $a0, .LBB1_4 + beq $s4, $a0, .LBB1_4 .LBB1_7: # %.split139.us.us.us # Parent Loop BB1_5 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB1_9 Depth 3 # Child Loop BB1_13 Depth 4 # Child Loop BB1_29 Depth 4 - ld.d $a0, $sp, 48 # 8-byte Folded Reload - sub.d $a0, $s5, $a0 - bstrpick.d $a1, $s5, 15, 0 + sub.d $a0, $s4, $s5 + bstrpick.d $a1, $s4, 15, 0 movgr2fr.w $fa0, $a1 ffint.d.w $fs1, $fa0 slli.d $a1, $a0, 4 alsl.d $a0, $a0, $a1, 3 - move $s6, $s1 - add.d $a0, $s1, $a0 - move $s3, $a2 - add.d $s2, $a0, $a2 - ld.d $a1, $sp, 40 # 8-byte Folded Reload - add.d $s0, $a0, $a1 - ori $s4, $zero, 1 + add.d $a0, $fp, $a0 + move $s7, $a2 + add.d $s8, $a0, $a2 + add.d $s3, $a0, $s6 + ori $s2, $zero, 1 vldi $vr0, -928 - vst $vr0, $sp, 288 # 16-byte Folded Spill + vst $vr0, $sp, 240 # 16-byte Folded Spill b .LBB1_9 .p2align 4, , 16 .LBB1_8: # %._crit_edge.us.us.us.us.1 # in Loop: Header=BB1_9 Depth=3 vrepli.b $vr0, 0 - vld $vr5, $sp, 128 # 16-byte Folded Reload - vfcmp.clt.d $vr1, $vr5, $vr4 - vldi $vr2, -912 - fcmp.clt.d $fcc0, $fa2, $fs2 - fsel $fa2, $fs2, $fa2, $fcc0 - vldi $vr3, -944 - fmul.d $fa2, $fa2, $fa3 + vldi $vr4, -912 + vfcmp.clt.d $vr1, $vr4, $vr6 + fcmp.clt.d $fcc0, $fa4, $fs2 + fsel $fa2, $fs2, $fa4, $fcc0 + vldi $vr5, -944 + fmul.d $fa2, $fa2, $fa5 movgr2fr.d $fa3, $zero fcmp.clt.d $fcc0, $fs2, $fa3 fsel $fa2, $fa2, $fa3, $fcc0 - fld.d $fa3, $sp, 88 # 8-byte Folded Reload + fld.d $fa3, $sp, 72 # 8-byte Folded Reload fadd.d $fa2, $fa2, $fa3 - vbitsel.v $vr1, $vr4, $vr5, $vr1 - vld $vr3, $sp, 112 # 16-byte Folded Reload - vfmul.d $vr1, $vr1, $vr3 - vfcmp.cule.d $vr0, $vr0, $vr4 + vbitsel.v $vr1, $vr6, $vr4, $vr1 + vfmul.d $vr1, $vr1, $vr5 + vfcmp.cule.d $vr0, $vr0, $vr6 vand.v $vr0, $vr0, $vr1 - vld $vr1, $sp, 96 # 16-byte Folded Reload + vld $vr1, $sp, 80 # 16-byte Folded Reload vfadd.d $vr0, $vr0, $vr1 - vst $vr0, $s2, 0 - fst.d $fa2, $s0, 0 - andi $a0, $s4, 1 + vst $vr0, $s8, 0 + fst.d $fa2, $s3, 0 + andi $a0, $s2, 1 vldi $vr0, -904 - vst $vr0, $sp, 288 # 16-byte Folded Spill - move $s4, $zero - vld $vr3, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 240 # 16-byte Folded Spill + move $s2, $zero + vld $vr3, $sp, 48 # 16-byte Folded Reload beqz $a0, .LBB1_6 .LBB1_9: # %.preheader126.us.us.us # Parent Loop BB1_5 Depth=1 @@ -1307,8 +1297,8 @@ main: # @main # => This Loop Header: Depth=3 # Child Loop BB1_13 Depth 4 # Child Loop BB1_29 Depth 4 - move $s1, $fp - fld.d $fs3, $sp, 56 # 8-byte Folded Reload + move $s1, $s0 + fld.d $fs3, $sp, 40 # 8-byte Folded Reload b .LBB1_13 .p2align 4, , 16 .LBB1_10: # in Loop: Header=BB1_13 Depth=4 @@ -1324,34 +1314,34 @@ main: # @main fadd.d $fa1, $fs2, $fa2 fmul.d $fa1, $fa1, $fa2 fadd.d $fa1, $fa1, $fs1 - fld.d $fa3, $sp, 248 # 8-byte Folded Reload + fld.d $fa3, $sp, 200 # 8-byte Folded Reload fmul.d $fa1, $fa1, $fa3 vldi $vr3, -800 fadd.d $fa1, $fa1, $fa3 - vld $vr4, $sp, 288 # 16-byte Folded Reload + vld $vr4, $sp, 240 # 16-byte Folded Reload fadd.d $fa0, $fa4, $fa0 fmul.d $fa0, $fa0, $fa2 fadd.d $fa0, $fa0, $fs0 - fld.d $fa2, $sp, 240 # 8-byte Folded Reload + fld.d $fa2, $sp, 192 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa2 fadd.d $fa0, $fa0, $fa3 fmul.d $fa2, $fa0, $fs4 vreplvei.d $vr1, $vr1, 0 - vld $vr3, $sp, 224 # 16-byte Folded Reload + vld $vr3, $sp, 176 # 16-byte Folded Reload vfmul.d $vr1, $vr1, $vr3 vreplvei.d $vr0, $vr0, 0 - vld $vr3, $sp, 208 # 16-byte Folded Reload + vld $vr3, $sp, 160 # 16-byte Folded Reload vfmul.d $vr0, $vr0, $vr3 vfadd.d $vr0, $vr1, $vr0 vreplvei.d $vr1, $vr1, 1 fadd.d $fa1, $fa1, $fa2 - vld $vr2, $sp, 192 # 16-byte Folded Reload + vld $vr2, $sp, 144 # 16-byte Folded Reload vfadd.d $vr0, $vr0, $vr2 fadd.d $fa1, $fa1, $fs5 - vld $vr2, $sp, 176 # 16-byte Folded Reload + vld $vr2, $sp, 128 # 16-byte Folded Reload vfmul.d $vr2, $vr0, $vr2 fmul.d $fa3, $fa1, $fs6 - vld $vr4, $sp, 160 # 16-byte Folded Reload + vld $vr4, $sp, 112 # 16-byte Folded Reload vfadd.d $vr2, $vr2, $vr4 fadd.d $fa3, $fa3, $fs7 vreplvei.d $vr4, $vr0, 1 @@ -1363,24 +1353,24 @@ main: # @main fmul.d $fa0, $fa0, $fa5 fmul.d $fa4, $fa4, $fa5 fmul.d $fa1, $fa1, $fa5 - vst $vr2, $sp, 304 - fst.d $fa3, $sp, 320 - fst.d $fa0, $sp, 328 - fst.d $fa4, $sp, 336 - fst.d $fa1, $sp, 344 - addi.d $a0, $sp, 352 - addi.d $a1, $sp, 304 - addi.d $a3, $sp, 386 + vst $vr2, $sp, 256 + fst.d $fa3, $sp, 272 + fst.d $fa0, $sp, 280 + fst.d $fa4, $sp, 288 + fst.d $fa1, $sp, 296 + addi.d $a0, $sp, 304 + addi.d $a1, $sp, 256 + addi.d $a3, $sp, 338 move $a2, $zero pcaddu18i $ra, %call36(_Z8radianceRK3RayiPt) jirl $ra, $ra, 0 - vld $vr0, $sp, 352 - fld.d $fa1, $sp, 368 - vld $vr2, $sp, 256 # 16-byte Folded Reload + vld $vr0, $sp, 304 + fld.d $fa1, $sp, 320 + vld $vr2, $sp, 208 # 16-byte Folded Reload vfmul.d $vr0, $vr2, $vr0 - vld $vr2, $sp, 272 # 16-byte Folded Reload + vld $vr2, $sp, 224 # 16-byte Folded Reload fmul.d $fa1, $fa2, $fa1 - vld $vr3, $sp, 144 # 16-byte Folded Reload + vld $vr3, $sp, 96 # 16-byte Folded Reload vfadd.d $vr3, $vr3, $vr0 addi.w $s1, $s1, -1 fadd.d $fs3, $fs3, $fa1 @@ -1389,8 +1379,8 @@ main: # @main # Parent Loop BB1_7 Depth=2 # Parent Loop BB1_9 Depth=3 # => This Inner Loop Header: Depth=4 - vst $vr3, $sp, 144 # 16-byte Folded Spill - addi.d $a0, $sp, 386 + vst $vr3, $sp, 96 # 16-byte Folded Spill + addi.d $a0, $sp, 338 pcaddu18i $ra, %call36(erand48) jirl $ra, $ra, 0 fadd.d $fa0, $fa0, $fa0 @@ -1417,7 +1407,7 @@ main: # @main # in Loop: Header=BB1_13 Depth=4 fsub.d $fs2, $fa2, $fa0 .LBB1_18: # in Loop: Header=BB1_13 Depth=4 - addi.d $a0, $sp, 386 + addi.d $a0, $sp, 338 pcaddu18i $ra, %call36(erand48) jirl $ra, $ra, 0 fadd.d $fa0, $fa0, $fa0 @@ -1464,33 +1454,29 @@ main: # @main .p2align 4, , 16 .LBB1_25: # %._crit_edge.us.us.us.us # in Loop: Header=BB1_9 Depth=3 - vrepli.b $vr4, 0 - vreplgr2vr.d $vr2, $s7 + vrepli.b $vr6, 0 + vldi $vr2, -912 vfcmp.clt.d $vr0, $vr2, $vr3 - vldi $vr1, -912 - fcmp.clt.d $fcc0, $fa1, $fs3 - fsel $fa1, $fs3, $fa1, $fcc0 + fcmp.clt.d $fcc0, $fa2, $fs3 + fsel $fa1, $fs3, $fa2, $fcc0 movgr2fr.d $fs2, $zero fcmp.clt.d $fcc0, $fs3, $fs2 - vst $vr2, $sp, 128 # 16-byte Folded Spill vbitsel.v $vr0, $vr3, $vr2, $vr0 - vreplgr2vr.d $vr2, $s8 - vst $vr2, $sp, 112 # 16-byte Folded Spill - vfmul.d $vr0, $vr0, $vr2 - vld $vr2, $s2, 0 - vfcmp.cule.d $vr3, $vr4, $vr3 - vand.v $vr0, $vr3, $vr0 - fld.d $fa3, $s0, 0 - vfadd.d $vr2, $vr0, $vr2 - vldi $vr0, -944 - fmul.d $fa0, $fa1, $fa0 - fsel $fa0, $fa0, $fs2, $fcc0 - fadd.d $fa0, $fa0, $fa3 - vst $vr2, $sp, 96 # 16-byte Folded Spill - vst $vr2, $s2, 0 - fst.d $fa0, $sp, 88 # 8-byte Folded Spill - fst.d $fa0, $s0, 0 - move $s1, $fp + vldi $vr4, -944 + vfmul.d $vr0, $vr0, $vr4 + vfcmp.cule.d $vr2, $vr6, $vr3 + vand.v $vr0, $vr2, $vr0 + vld $vr2, $s8, 0 + fld.d $fa3, $s3, 0 + fmul.d $fa1, $fa1, $fa4 + fsel $fa1, $fa1, $fs2, $fcc0 + vfadd.d $vr0, $vr0, $vr2 + fadd.d $fa1, $fa1, $fa3 + vst $vr0, $sp, 80 # 16-byte Folded Spill + vst $vr0, $s8, 0 + fst.d $fa1, $sp, 72 # 8-byte Folded Spill + fst.d $fa1, $s3, 0 + move $s1, $s0 b .LBB1_29 .p2align 4, , 16 .LBB1_26: # in Loop: Header=BB1_29 Depth=4 @@ -1507,34 +1493,34 @@ main: # @main vldi $vr2, -928 fmul.d $fa1, $fa1, $fa2 fadd.d $fa1, $fa1, $fs1 - fld.d $fa3, $sp, 248 # 8-byte Folded Reload + fld.d $fa3, $sp, 200 # 8-byte Folded Reload fmul.d $fa1, $fa1, $fa3 vldi $vr3, -800 fadd.d $fa1, $fa1, $fa3 - vld $vr4, $sp, 288 # 16-byte Folded Reload + vld $vr4, $sp, 240 # 16-byte Folded Reload fadd.d $fa0, $fa4, $fa0 fmul.d $fa0, $fa0, $fa2 fadd.d $fa0, $fa0, $fs0 - fld.d $fa2, $sp, 240 # 8-byte Folded Reload + fld.d $fa2, $sp, 192 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa2 fadd.d $fa0, $fa0, $fa3 fmul.d $fa2, $fa0, $fs4 vreplvei.d $vr1, $vr1, 0 - vld $vr3, $sp, 224 # 16-byte Folded Reload + vld $vr3, $sp, 176 # 16-byte Folded Reload vfmul.d $vr1, $vr1, $vr3 vreplvei.d $vr0, $vr0, 0 - vld $vr3, $sp, 208 # 16-byte Folded Reload + vld $vr3, $sp, 160 # 16-byte Folded Reload vfmul.d $vr0, $vr0, $vr3 vfadd.d $vr0, $vr1, $vr0 vreplvei.d $vr1, $vr1, 1 fadd.d $fa1, $fa1, $fa2 - vld $vr2, $sp, 192 # 16-byte Folded Reload + vld $vr2, $sp, 144 # 16-byte Folded Reload vfadd.d $vr0, $vr0, $vr2 fadd.d $fa1, $fa1, $fs5 - vld $vr2, $sp, 176 # 16-byte Folded Reload + vld $vr2, $sp, 128 # 16-byte Folded Reload vfmul.d $vr2, $vr0, $vr2 fmul.d $fa3, $fa1, $fs6 - vld $vr4, $sp, 160 # 16-byte Folded Reload + vld $vr4, $sp, 112 # 16-byte Folded Reload vfadd.d $vr2, $vr2, $vr4 fadd.d $fa3, $fa3, $fs7 vreplvei.d $vr4, $vr0, 1 @@ -1546,25 +1532,25 @@ main: # @main fmul.d $fa0, $fa0, $fa5 fmul.d $fa4, $fa4, $fa5 fmul.d $fa1, $fa1, $fa5 - vst $vr2, $sp, 304 - fst.d $fa3, $sp, 320 - fst.d $fa0, $sp, 328 - fst.d $fa4, $sp, 336 - fst.d $fa1, $sp, 344 - addi.d $a0, $sp, 352 - addi.d $a1, $sp, 304 - addi.d $a3, $sp, 386 + vst $vr2, $sp, 256 + fst.d $fa3, $sp, 272 + fst.d $fa0, $sp, 280 + fst.d $fa4, $sp, 288 + fst.d $fa1, $sp, 296 + addi.d $a0, $sp, 304 + addi.d $a1, $sp, 256 + addi.d $a3, $sp, 338 move $a2, $zero pcaddu18i $ra, %call36(_Z8radianceRK3RayiPt) jirl $ra, $ra, 0 - vld $vr0, $sp, 352 - fld.d $fa1, $sp, 368 - vld $vr2, $sp, 256 # 16-byte Folded Reload + vld $vr0, $sp, 304 + fld.d $fa1, $sp, 320 + vld $vr2, $sp, 208 # 16-byte Folded Reload vfmul.d $vr0, $vr2, $vr0 - vld $vr2, $sp, 272 # 16-byte Folded Reload + vld $vr2, $sp, 224 # 16-byte Folded Reload fmul.d $fa1, $fa2, $fa1 - vld $vr4, $sp, 144 # 16-byte Folded Reload - vfadd.d $vr4, $vr4, $vr0 + vld $vr6, $sp, 96 # 16-byte Folded Reload + vfadd.d $vr6, $vr6, $vr0 addi.w $s1, $s1, -1 fadd.d $fs2, $fs2, $fa1 beqz $s1, .LBB1_8 @@ -1572,8 +1558,8 @@ main: # @main # Parent Loop BB1_7 Depth=2 # Parent Loop BB1_9 Depth=3 # => This Inner Loop Header: Depth=4 - vst $vr4, $sp, 144 # 16-byte Folded Spill - addi.d $a0, $sp, 386 + vst $vr6, $sp, 96 # 16-byte Folded Spill + addi.d $a0, $sp, 338 pcaddu18i $ra, %call36(erand48) jirl $ra, $ra, 0 fadd.d $fa0, $fa0, $fa0 @@ -1600,7 +1586,7 @@ main: # @main # in Loop: Header=BB1_29 Depth=4 fsub.d $fs3, $fa2, $fa0 .LBB1_34: # in Loop: Header=BB1_29 Depth=4 - addi.d $a0, $sp, 386 + addi.d $a0, $sp, 338 pcaddu18i $ra, %call36(erand48) jirl $ra, $ra, 0 fadd.d $fa0, $fa0, $fa0 @@ -1647,7 +1633,7 @@ main: # @main .LBB1_41: # %.split148.preheader move $a0, $zero ori $a1, $a2, 16 - add.d $a1, $s1, $a1 + add.d $a1, $fp, $a1 movgr2fr.d $fa0, $zero lu12i.w $a2, -6 ori $a3, $zero, 768 @@ -1677,26 +1663,26 @@ main: # @main bne $a0, $a3, .LBB1_42 .LBB1_45: # %.split154.us move $a0, $zero - fld.d $fs7, $sp, 392 # 8-byte Folded Reload - fld.d $fs6, $sp, 400 # 8-byte Folded Reload - fld.d $fs5, $sp, 408 # 8-byte Folded Reload - fld.d $fs4, $sp, 416 # 8-byte Folded Reload - fld.d $fs3, $sp, 424 # 8-byte Folded Reload - fld.d $fs2, $sp, 432 # 8-byte Folded Reload - fld.d $fs1, $sp, 440 # 8-byte Folded Reload - fld.d $fs0, $sp, 448 # 8-byte Folded Reload - ld.d $s8, $sp, 456 # 8-byte Folded Reload - ld.d $s7, $sp, 464 # 8-byte Folded Reload - ld.d $s6, $sp, 472 # 8-byte Folded Reload - ld.d $s5, $sp, 480 # 8-byte Folded Reload - ld.d $s4, $sp, 488 # 8-byte Folded Reload - ld.d $s3, $sp, 496 # 8-byte Folded Reload - ld.d $s2, $sp, 504 # 8-byte Folded Reload - ld.d $s1, $sp, 512 # 8-byte Folded Reload - ld.d $s0, $sp, 520 # 8-byte Folded Reload - ld.d $fp, $sp, 528 # 8-byte Folded Reload - ld.d $ra, $sp, 536 # 8-byte Folded Reload - addi.d $sp, $sp, 544 + fld.d $fs7, $sp, 344 # 8-byte Folded Reload + fld.d $fs6, $sp, 352 # 8-byte Folded Reload + fld.d $fs5, $sp, 360 # 8-byte Folded Reload + fld.d $fs4, $sp, 368 # 8-byte Folded Reload + fld.d $fs3, $sp, 376 # 8-byte Folded Reload + fld.d $fs2, $sp, 384 # 8-byte Folded Reload + fld.d $fs1, $sp, 392 # 8-byte Folded Reload + fld.d $fs0, $sp, 400 # 8-byte Folded Reload + ld.d $s8, $sp, 408 # 8-byte Folded Reload + ld.d $s7, $sp, 416 # 8-byte Folded Reload + ld.d $s6, $sp, 424 # 8-byte Folded Reload + ld.d $s5, $sp, 432 # 8-byte Folded Reload + ld.d $s4, $sp, 440 # 8-byte Folded Reload + ld.d $s3, $sp, 448 # 8-byte Folded Reload + ld.d $s2, $sp, 456 # 8-byte Folded Reload + ld.d $s1, $sp, 464 # 8-byte Folded Reload + ld.d $s0, $sp, 472 # 8-byte Folded Reload + ld.d $fp, $sp, 480 # 8-byte Folded Reload + ld.d $ra, $sp, 488 # 8-byte Folded Reload + addi.d $sp, $sp, 496 ret .Lfunc_end1: .size main, .Lfunc_end1-main diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-comp-goto-1.dir/comp-goto-1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-comp-goto-1.dir/comp-goto-1.s index ada5e996..59b5dd81 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-comp-goto-1.dir/comp-goto-1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-comp-goto-1.dir/comp-goto-1.s @@ -39,9 +39,7 @@ simulator_kernel: # @simulator_kernel addi.d $t0, $a6, 16 vinsgr2vr.w $vr0, $a5, 0 vinsgr2vr.w $vr0, $a5, 1 - lu12i.w $t1, 63 - ori $t1, $t1, 4095 - vreplgr2vr.w $vr1, $t1 + vldi $vr1, -2301 vrepli.b $vr2, 0 lu12i.w $t1, -64 vreplgr2vr.d $vr3, $t1 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr36034-1.dir/pr36034-1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr36034-1.dir/pr36034-1.s index cde45b95..ef20815e 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr36034-1.dir/pr36034-1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr36034-1.dir/pr36034-1.s @@ -56,8 +56,7 @@ main: # @main xvld $xr2, $a0, 32 xvld $xr3, $a0, 64 xvld $xr4, $a0, 96 - lu52i.d $a1, $zero, -1025 - xvreplgr2vr.d $xr1, $a1 + xvldi $xr1, -784 xvfcmp.ceq.d $xr5, $xr0, $xr1 xvpickve2gr.d $a1, $xr5, 0 vinsgr2vr.b $vr0, $a1, 0 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr36034-2.dir/pr36034-2.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr36034-2.dir/pr36034-2.s index da0d1405..8b7c8343 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr36034-2.dir/pr36034-2.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr36034-2.dir/pr36034-2.s @@ -56,8 +56,7 @@ main: # @main xvld $xr2, $a0, 32 xvld $xr3, $a0, 64 xvld $xr4, $a0, 96 - lu52i.d $a1, $zero, -1025 - xvreplgr2vr.d $xr1, $a1 + xvldi $xr1, -784 xvfcmp.ceq.d $xr5, $xr0, $xr1 xvpickve2gr.d $a1, $xr5, 0 vinsgr2vr.b $vr0, $a1, 0 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr37573.dir/pr37573.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr37573.dir/pr37573.s index 8199c939..759f81ab 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr37573.dir/pr37573.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr37573.dir/pr37573.s @@ -233,25 +233,24 @@ foo: # @foo b .LBB2_5 .LBB2_2: # %vector.ph ld.w $a2, $a0, 12 - move $a4, $zero + move $a3, $zero addi.d $a1, $a0, 12 xvinsgr2vr.w $xr0, $a2, 7 lu12i.w $a2, 524287 ori $a2, $a2, 4094 xvreplgr2vr.w $xr1, $a2 - lu12i.w $a3, -524288 - xvreplgr2vr.w $xr2, $a3 + xvldi $xr2, -3200 xvrepli.w $xr3, 1 lu12i.w $a2, -421749 ori $a2, $a2, 223 xvreplgr2vr.w $xr4, $a2 - ori $a5, $zero, 896 + ori $a4, $zero, 896 .p2align 4, , 16 .LBB2_3: # %vector.body # =>This Inner Loop Header: Depth=1 xvori.b $xr5, $xr0, 0 - add.d $a6, $a0, $a4 - xvld $xr0, $a6, 16 + add.d $a5, $a0, $a3 + xvld $xr0, $a5, 16 xvpickve.w $xr5, $xr5, 7 xvinsve0.w $xr5, $xr5, 0 xvinsve0.w $xr5, $xr0, 1 @@ -271,23 +270,24 @@ foo: # @foo xvand.v $xr5, $xr5, $xr2 xvor.v $xr5, $xr6, $xr5 xvsrli.w $xr5, $xr5, 1 - xvld $xr6, $a6, 1600 + xvld $xr6, $a5, 1600 xvand.v $xr7, $xr0, $xr3 xvseqi.w $xr7, $xr7, 0 xvandn.v $xr7, $xr7, $xr4 xvxor.v $xr6, $xr7, $xr6 xvxor.v $xr5, $xr6, $xr5 - addi.d $a4, $a4, 32 - xvst $xr5, $a6, 12 - bne $a4, $a5, .LBB2_3 + addi.d $a3, $a3, 32 + xvst $xr5, $a5, 12 + bne $a3, $a4, .LBB2_3 # %bb.4: # %.loopexit.loopexit + xvpickve2gr.w $a3, $xr0, 7 ld.wu $a4, $a0, 912 - xvpickve2gr.w $a5, $xr0, 7 - lu32i.d $a3, 0 - and $a5, $a5, $a3 + lu12i.w $a5, -524288 + lu32i.d $a5, 0 + and $a3, $a3, $a5 srli.d $a6, $a4, 1 - bstrins.d $a5, $a6, 30, 1 - srli.d $a5, $a5, 1 + bstrins.d $a3, $a6, 30, 1 + srli.d $a3, $a3, 1 andi $a6, $a4, 1 ori $a7, $zero, 2496 ldx.w $a7, $a0, $a7 @@ -296,22 +296,22 @@ foo: # @foo and $a6, $a6, $a2 xor $a6, $a6, $a7 ld.wu $a7, $a0, 916 - xor $a5, $a6, $a5 - st.w $a5, $a0, 908 - and $a4, $a4, $a3 - srli.d $a5, $a7, 1 - bstrins.d $a4, $a5, 30, 1 - srli.d $a4, $a4, 1 - ori $a5, $zero, 2500 - ldx.w $a5, $a0, $a5 + xor $a3, $a6, $a3 + st.w $a3, $a0, 908 + and $a3, $a4, $a5 + srli.d $a4, $a7, 1 + bstrins.d $a3, $a4, 30, 1 + srli.d $a3, $a3, 1 + ori $a4, $zero, 2500 + ldx.w $a4, $a0, $a4 andi $a6, $a7, 1 sub.d $a6, $zero, $a6 and $a6, $a6, $a2 - xor $a5, $a6, $a5 + xor $a4, $a6, $a4 ld.wu $a6, $a0, 920 - xor $a4, $a5, $a4 - st.w $a4, $a0, 912 - and $a3, $a7, $a3 + xor $a3, $a4, $a3 + st.w $a3, $a0, 912 + and $a3, $a7, $a5 srli.d $a4, $a6, 1 bstrins.d $a3, $a4, 30, 1 srli.d $a3, $a3, 1 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr53645-2.dir/pr53645-2.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr53645-2.dir/pr53645-2.s index 92b8dbef..dd6e3a72 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr53645-2.dir/pr53645-2.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr53645-2.dir/pr53645-2.s @@ -331,8 +331,7 @@ sq65656565: # @sq65656565 ori $a1, $a1, 2731 vreplgr2vr.w $vr1, $a1 vmuh.h $vr0, $vr0, $vr1 - lu12i.w $a1, 16 - vreplgr2vr.w $vr1, $a1 + vldi $vr1, -3583 vsra.h $vr0, $vr0, $vr1 vsrli.h $vr1, $vr0, 15 vadd.h $vr0, $vr0, $vr1 @@ -351,8 +350,7 @@ sr65656565: # @sr65656565 ori $a1, $a1, 2731 vreplgr2vr.w $vr1, $a1 vmuh.h $vr1, $vr0, $vr1 - lu12i.w $a1, 16 - vreplgr2vr.w $vr2, $a1 + vldi $vr2, -3583 vsra.h $vr1, $vr1, $vr2 vsrli.h $vr2, $vr1, 15 vadd.h $vr1, $vr1, $vr2 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr56837.dir/pr56837.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr56837.dir/pr56837.s index c0f0f4cb..3af02907 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr56837.dir/pr56837.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr56837.dir/pr56837.s @@ -8,9 +8,7 @@ foo: # @foo lu12i.w $a0, -2 pcalau12i $a1, %pc_hi20(a) addi.d $a1, $a1, %pc_lo12(a) - addi.w $a2, $zero, -1 - lu32i.d $a2, 0 - vreplgr2vr.d $vr0, $a2 + vldi $vr0, -1777 lu12i.w $a2, 2 .p2align 4, , 16 .LBB0_1: # %vector.body diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-pr50310.dir/pr50310.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-pr50310.dir/pr50310.s index db9e1cbc..a1d16e2c 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-pr50310.dir/pr50310.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-pr50310.dir/pr50310.s @@ -10,8 +10,7 @@ foo: # @foo pcalau12i $a0, %pc_hi20(s2) xvld $xr1, $a0, %pc_lo12(s2) xvfcmp.clt.d $xr2, $xr1, $xr0 - lu52i.d $a0, $zero, -1025 - xvreplgr2vr.d $xr3, $a0 + xvldi $xr3, -784 xvand.v $xr2, $xr2, $xr3 pcalau12i $a0, %pc_hi20(s3) addi.d $a0, $a0, %pc_lo12(s3) diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvabsd-1.dir/lasx-xvabsd-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvabsd-1.dir/lasx-xvabsd-1.s index f1290169..5f724cd3 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvabsd-1.dir/lasx-xvabsd-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvabsd-1.dir/lasx-xvabsd-1.s @@ -1437,8 +1437,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_47) xvld $xr1, $a0, %pc_lo12(.LCPI2_47) xvst $xr0, $sp, 96 - lu12i.w $a0, 4 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3776 xvabsd.w $xr0, $xr1, $xr0 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvabsd-2.dir/lasx-xvabsd-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvabsd-2.dir/lasx-xvabsd-2.s index 256fbc32..956edef0 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvabsd-2.dir/lasx-xvabsd-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvabsd-2.dir/lasx-xvabsd-2.s @@ -851,13 +851,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_17) xvld $xr0, $a0, %pc_lo12(.LCPI2_17) - xvst $xr0, $sp, 128 pcalau12i $a0, %pc_hi20(.LCPI2_18) - xvld $xr0, $a0, %pc_lo12(.LCPI2_18) - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr1, $a0 - xvabsd.wu $xr0, $xr0, $xr1 + xvld $xr1, $a0, %pc_lo12(.LCPI2_18) + xvst $xr0, $sp, 128 + xvldi $xr0, -1789 + xvabsd.wu $xr0, $xr1, $xr0 xvst $xr0, $sp, 160 addi.d $a0, $sp, 128 addi.d $a1, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvadd.dir/lasx-xvadd.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvadd.dir/lasx-xvadd.s index f8f9e3b6..312b4a54 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvadd.dir/lasx-xvadd.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvadd.dir/lasx-xvadd.s @@ -778,13 +778,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_22) xvld $xr0, $a0, %pc_lo12(.LCPI2_22) - xvst $xr0, $sp, 128 pcalau12i $a0, %pc_hi20(.LCPI2_23) - xvld $xr0, $a0, %pc_lo12(.LCPI2_23) - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr1, $a0 - xvadd.q $xr0, $xr1, $xr0 + xvld $xr1, $a0, %pc_lo12(.LCPI2_23) + xvst $xr0, $sp, 128 + xvldi $xr0, -1552 + xvadd.q $xr0, $xr0, $xr1 xvst $xr0, $sp, 160 addi.d $a0, $sp, 128 addi.d $a1, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvadda.dir/lasx-xvadda.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvadda.dir/lasx-xvadda.s index a4619118..742ac92a 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvadda.dir/lasx-xvadda.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvadda.dir/lasx-xvadda.s @@ -881,8 +881,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 256 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3568 xvst $xr0, $sp, 192 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvadda.b $xr0, $xr0, $xr1 @@ -979,8 +978,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_12) xvld $xr1, $a0, %pc_lo12(.LCPI2_12) xvst $xr0, $sp, 192 - lu12i.w $a0, 4 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3776 xvadda.b $xr0, $xr0, $xr1 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 @@ -1402,12 +1400,11 @@ main: # @main lu32i.d $a0, 4 xvreplgr2vr.d $xr0, $a0 xvst $xr0, $sp, 192 - lu12i.w $a0, -4096 - xvreplgr2vr.d $xr0, $a0 lu12i.w $a0, 4096 lu32i.d $a0, 4 - xvreplgr2vr.d $xr1, $a0 - xvadda.d $xr0, $xr0, $xr1 + xvreplgr2vr.d $xr0, $a0 + xvldi $xr1, -1544 + xvadda.d $xr0, $xr1, $xr0 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 addi.d $a1, $sp, 224 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwev-1.dir/lasx-xvaddwev-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwev-1.dir/lasx-xvaddwev-1.s index 29b774eb..f63a617c 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwev-1.dir/lasx-xvaddwev-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwev-1.dir/lasx-xvaddwev-1.s @@ -1359,8 +1359,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_38) xvld $xr1, $a0, %pc_lo12(.LCPI2_38) xvst $xr0, $sp, 224 - lu12i.w $a0, 256 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3568 xvaddwev.d.w $xr0, $xr0, $xr1 xvst $xr0, $sp, 256 addi.d $a0, $sp, 224 @@ -1626,8 +1625,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_65) xvld $xr0, $a0, %pc_lo12(.LCPI2_65) xvst $xr0, $sp, 224 - ori $a0, $zero, 512 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3838 xvld $xr1, $sp, 192 # 32-byte Folded Reload xvaddwev.q.d $xr0, $xr0, $xr1 xvst $xr0, $sp, 256 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwev-2.dir/lasx-xvaddwev-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwev-2.dir/lasx-xvaddwev-2.s index 3f7a1342..9e37a95f 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwev-2.dir/lasx-xvaddwev-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwev-2.dir/lasx-xvaddwev-2.s @@ -1033,13 +1033,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_9) xvld $xr0, $a0, %pc_lo12(.LCPI2_9) - xvst $xr0, $sp, 96 pcalau12i $a0, %pc_hi20(.LCPI2_10) - xvld $xr0, $a0, %pc_lo12(.LCPI2_10) - lu12i.w $a0, 7 - ori $a0, $a0, 3072 - xvreplgr2vr.h $xr1, $a0 - xvaddwev.h.bu $xr0, $xr0, $xr1 + xvld $xr1, $a0, %pc_lo12(.LCPI2_10) + xvst $xr0, $sp, 96 + xvldi $xr0, -2692 + xvaddwev.h.bu $xr0, $xr1, $xr0 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 addi.d $a1, $sp, 128 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwev-3.dir/lasx-xvaddwev-3.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwev-3.dir/lasx-xvaddwev-3.s index 81b20ef2..7772538f 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwev-3.dir/lasx-xvaddwev-3.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwev-3.dir/lasx-xvaddwev-3.s @@ -750,9 +750,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 1 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2529 xvst $xr0, $sp, 96 xvld $xr1, $sp, 64 # 32-byte Folded Reload xvaddwev.w.hu.h $xr0, $xr1, $xr0 @@ -807,9 +805,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvst $xr0, $sp, 96 xvrepli.b $xr1, -1 xvst $xr1, $sp, 32 # 32-byte Folded Spill diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwod-2.dir/lasx-xvaddwod-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwod-2.dir/lasx-xvaddwod-2.s index 8959db90..1fd69118 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwod-2.dir/lasx-xvaddwod-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwod-2.dir/lasx-xvaddwod-2.s @@ -1145,9 +1145,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvst $xr0, $sp, 160 xvrepli.d $xr0, -18 xvld $xr1, $sp, 128 # 32-byte Folded Reload @@ -1195,12 +1193,10 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1789 xvst $xr0, $sp, 96 # 32-byte Folded Spill xvst $xr0, $sp, 160 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvaddwod.d.wu $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwod-3.dir/lasx-xvaddwod-3.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwod-3.dir/lasx-xvaddwod-3.s index 5bbaecf1..5fca5bfd 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwod-3.dir/lasx-xvaddwod-3.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvaddwod-3.dir/lasx-xvaddwod-3.s @@ -923,13 +923,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_15) xvld $xr0, $a0, %pc_lo12(.LCPI2_15) - xvst $xr0, $sp, 96 pcalau12i $a0, %pc_hi20(.LCPI2_16) - xvld $xr0, $a0, %pc_lo12(.LCPI2_16) - lu12i.w $a0, 7 - ori $a0, $a0, 3072 - xvreplgr2vr.h $xr1, $a0 - xvaddwod.w.hu.h $xr0, $xr1, $xr0 + xvld $xr1, $a0, %pc_lo12(.LCPI2_16) + xvst $xr0, $sp, 96 + xvldi $xr0, -2692 + xvaddwod.w.hu.h $xr0, $xr0, $xr1 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 addi.d $a1, $sp, 128 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvandi.dir/lasx-xvandi.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvandi.dir/lasx-xvandi.s index d7eaefac..23d89199 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvandi.dir/lasx-xvandi.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvandi.dir/lasx-xvandi.s @@ -300,8 +300,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 1024 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2812 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvavg-1.dir/lasx-xvavg-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvavg-1.dir/lasx-xvavg-1.s index 00673658..fa186176 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvavg-1.dir/lasx-xvavg-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvavg-1.dir/lasx-xvavg-1.s @@ -849,9 +849,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, 65535 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1744 xvst $xr0, $sp, 192 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvavg.b $xr0, $xr0, $xr1 @@ -1123,13 +1121,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 7 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2433 xvst $xr0, $sp, 192 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvavg.w $xr0, $xr0, $xr1 xvst $xr0, $sp, 224 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvavg-2.dir/lasx-xvavg-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvavg-2.dir/lasx-xvavg-2.s index e9d84959..29942f5c 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvavg-2.dir/lasx-xvavg-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvavg-2.dir/lasx-xvavg-2.s @@ -831,7 +831,6 @@ main: # @main st.d $ra, $sp, 248 # 8-byte Folded Spill st.d $fp, $sp, 240 # 8-byte Folded Spill st.d $s0, $sp, 232 # 8-byte Folded Spill - st.d $s1, $sp, 224 # 8-byte Folded Spill addi.d $fp, $sp, 256 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -1030,11 +1029,8 @@ main: # @main lu12i.w $a0, 524279 ori $a0, $a0, 4095 xvreplgr2vr.d $xr0, $a0 - addi.w $s1, $zero, -1 xvst $xr0, $sp, 160 - move $a0, $s1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvavg.hu $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 @@ -1047,13 +1043,14 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_20) xvld $xr0, $a0, %pc_lo12(.LCPI2_20) - pcalau12i $a0, %pc_hi20(.LCPI2_21) - xvld $xr1, $a0, %pc_lo12(.LCPI2_21) xvst $xr0, $sp, 160 - lu52i.d $a0, $s1, 2047 - xvreplgr2vr.d $xr0, $a0 - xvst $xr0, $sp, 64 # 32-byte Folded Spill - xvavg.hu $xr0, $xr1, $xr0 + pcalau12i $a0, %pc_hi20(.LCPI2_21) + xvld $xr0, $a0, %pc_lo12(.LCPI2_21) + addi.w $a0, $zero, -1 + lu52i.d $a0, $a0, 2047 + xvreplgr2vr.d $xr1, $a0 + xvst $xr1, $sp, 64 # 32-byte Folded Spill + xvavg.hu $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 addi.d $a1, $sp, 192 @@ -1360,7 +1357,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -256 - ld.d $s1, $sp, 224 # 8-byte Folded Reload ld.d $s0, $sp, 232 # 8-byte Folded Reload ld.d $fp, $sp, 240 # 8-byte Folded Reload ld.d $ra, $sp, 248 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvavgr-1.dir/lasx-xvavgr-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvavgr-1.dir/lasx-xvavgr-1.s index c0a47229..73e6093d 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvavgr-1.dir/lasx-xvavgr-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvavgr-1.dir/lasx-xvavgr-1.s @@ -1114,8 +1114,7 @@ main: # @main lu52i.d $a0, $a0, -513 xvreplgr2vr.d $xr0, $a0 xvst $xr0, $sp, 160 - lu52i.d $a0, $zero, -1025 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -784 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvavgr.h $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitclr.dir/lasx-xvbitclr.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitclr.dir/lasx-xvbitclr.s index 8608968a..504c0eb6 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitclr.dir/lasx-xvbitclr.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitclr.dir/lasx-xvbitclr.s @@ -407,8 +407,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 256000 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -1456 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitrev.dir/lasx-xvbitrev.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitrev.dir/lasx-xvbitrev.s index b70c0d99..06474d9d 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitrev.dir/lasx-xvbitrev.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitrev.dir/lasx-xvbitrev.s @@ -503,8 +503,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 8 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2688 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitrevi.dir/lasx-xvbitrevi.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitrevi.dir/lasx-xvbitrevi.s index f85c14c6..ed2bb408 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitrevi.dir/lasx-xvbitrevi.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitrevi.dir/lasx-xvbitrevi.s @@ -405,8 +405,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 2 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2784 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 @@ -547,8 +546,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu52i.d $a0, $zero, 1024 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1024 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitset.dir/lasx-xvbitset.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitset.dir/lasx-xvbitset.s index 2be10de8..59e1adce 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitset.dir/lasx-xvbitset.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitset.dir/lasx-xvbitset.s @@ -753,9 +753,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1789 xvst $xr0, $sp, 96 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitseti.dir/lasx-xvbitseti.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitseti.dir/lasx-xvbitseti.s index 0e334de0..01f0c5dd 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitseti.dir/lasx-xvbitseti.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvbitseti.dir/lasx-xvbitseti.s @@ -534,8 +534,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 256 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3568 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 @@ -589,8 +588,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 32 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3582 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvclo.dir/lasx-xvclo.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvclo.dir/lasx-xvclo.s index 5c9f8469..116319aa 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvclo.dir/lasx-xvclo.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvclo.dir/lasx-xvclo.s @@ -736,9 +736,7 @@ main: # @main ori $a0, $a0, 16 xvreplgr2vr.d $xr0, $a0 xvst $xr0, $sp, 96 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvclo.h $xr0, $xr0 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvext2xv-1.dir/lasx-xvext2xv-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvext2xv-1.dir/lasx-xvext2xv-1.s index 9eaee111..57db11e9 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvext2xv-1.dir/lasx-xvext2xv-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvext2xv-1.dir/lasx-xvext2xv-1.s @@ -861,8 +861,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_9) xvld $xr0, $a0, %pc_lo12(.LCPI2_9) xvst $xr0, $sp, 192 - ori $a0, $zero, 512 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3838 vext2xv.d.b $xr0, $xr0 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvext2xv-2.dir/lasx-xvext2xv-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvext2xv-2.dir/lasx-xvext2xv-2.s index d27692aa..b2b4a468 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvext2xv-2.dir/lasx-xvext2xv-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvext2xv-2.dir/lasx-xvext2xv-2.s @@ -1486,9 +1486,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_27) xvld $xr0, $a0, %pc_lo12(.LCPI2_27) xvst $xr0, $sp, 192 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvst $xr0, $sp, 64 # 32-byte Folded Spill vext2xv.du.bu $xr0, $xr0 xvst $xr0, $sp, 224 @@ -1755,7 +1753,7 @@ main: # @main lu12i.w $a0, 8 xvreplgr2vr.d $xr0, $a0 xvst $xr0, $sp, 192 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2688 vext2xv.du.hu $xr0, $xr0 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 @@ -1799,9 +1797,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvst $xr0, $sp, 64 # 32-byte Folded Spill xvst $xr0, $sp, 192 xvrepli.b $xr0, -1 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvexth-1.dir/lasx-xvexth-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvexth-1.dir/lasx-xvexth-1.s index 315739ed..0f4c66c0 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvexth-1.dir/lasx-xvexth-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvexth-1.dir/lasx-xvexth-1.s @@ -551,9 +551,7 @@ main: # @main xvld $xr0, $a0, %pc_lo12(.LCPI2_5) xvst $xr0, $sp, 32 # 32-byte Folded Spill xvst $xr0, $sp, 160 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvexth.w.h $xr0, $xr0 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvexth-2.dir/lasx-xvexth-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvexth-2.dir/lasx-xvexth-2.s index 1f04ee81..0a610c21 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvexth-2.dir/lasx-xvexth-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvexth-2.dir/lasx-xvexth-2.s @@ -755,7 +755,6 @@ main: # @main st.d $ra, $sp, 216 # 8-byte Folded Spill st.d $fp, $sp, 208 # 8-byte Folded Spill st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill addi.d $fp, $sp, 224 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -997,8 +996,8 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_12) xvld $xr0, $a0, %pc_lo12(.LCPI2_12) xvst $xr0, $sp, 128 - lu12i.w $s1, 15 - ori $a0, $s1, 4094 + lu12i.w $a0, 15 + ori $a0, $a0, 4094 xvreplgr2vr.d $xr0, $a0 xvexth.wu.hu $xr0, $xr0 xvst $xr0, $sp, 160 @@ -1142,8 +1141,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_26) xvld $xr0, $a0, %pc_lo12(.LCPI2_26) - ori $a0, $s1, 4095 - xvreplgr2vr.w $xr1, $a0 + xvldi $xr1, -2305 xvst $xr1, $sp, 128 xvexth.wu.hu $xr0, $xr0 xvst $xr0, $sp, 160 @@ -1416,8 +1414,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_40) xvld $xr0, $a0, %pc_lo12(.LCPI2_40) xvst $xr0, $sp, 128 - lu12i.w $a0, 4080 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1788 xvexth.qu.du $xr0, $xr0 xvst $xr0, $sp, 160 addi.d $a0, $sp, 128 @@ -1429,7 +1426,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -224 - ld.d $s1, $sp, 192 # 8-byte Folded Reload ld.d $s0, $sp, 200 # 8-byte Folded Reload ld.d $fp, $sp, 208 # 8-byte Folded Reload ld.d $ra, $sp, 216 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvextrins.dir/lasx-xvextrins.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvextrins.dir/lasx-xvextrins.s index 9b64444c..3a275838 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvextrins.dir/lasx-xvextrins.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvextrins.dir/lasx-xvextrins.s @@ -1158,9 +1158,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_41) xvld $xr0, $a0, %pc_lo12(.LCPI2_41) xvst $xr0, $sp, 128 - lu12i.w $a0, 31 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2303 xvld $xr1, $sp, 96 # 32-byte Folded Reload xvextrins.d $xr0, $xr1, 7 xvst $xr0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfadd_d.dir/lasx-xvfadd_d.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfadd_d.dir/lasx-xvfadd_d.s index db17fcca..ba3ff943 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfadd_d.dir/lasx-xvfadd_d.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfadd_d.dir/lasx-xvfadd_d.s @@ -372,9 +372,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 7 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2433 xvst $xr0, $sp, 96 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfadd_s.dir/lasx-xvfadd_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfadd_s.dir/lasx-xvfadd_s.s index 556f231f..02ecc2ef 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfadd_s.dir/lasx-xvfadd_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfadd_s.dir/lasx-xvfadd_s.s @@ -686,8 +686,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -1424 xvst $xr0, $sp, 96 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfclass_d.dir/lasx-xvfclass_d.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfclass_d.dir/lasx-xvfclass_d.s index d9e7764b..3a946466 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfclass_d.dir/lasx-xvfclass_d.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfclass_d.dir/lasx-xvfclass_d.s @@ -499,8 +499,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 160 # 32-byte Folded Reload xvst $xr0, $sp, 192 - lu12i.w $a0, 4 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3776 xvfclass.d $xr0, $xr0 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfclass_s.dir/lasx-xvfclass_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfclass_s.dir/lasx-xvfclass_s.s index e9cf6ac7..88d8ef99 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfclass_s.dir/lasx-xvfclass_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfclass_s.dir/lasx-xvfclass_s.s @@ -297,8 +297,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 512 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3838 xvst $xr0, $sp, 64 # 32-byte Folded Spill xvst $xr0, $sp, 128 xvrepli.b $xr0, 0 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_caf_s.dir/lasx-xvfcmp_caf_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_caf_s.dir/lasx-xvfcmp_caf_s.s index 1c3af6aa..c3e793bd 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_caf_s.dir/lasx-xvfcmp_caf_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_caf_s.dir/lasx-xvfcmp_caf_s.s @@ -354,7 +354,6 @@ main: # @main st.d $ra, $sp, 216 # 8-byte Folded Spill st.d $fp, $sp, 208 # 8-byte Folded Spill st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill addi.d $fp, $sp, 224 bstrins.d $sp, $zero, 4, 0 xvrepli.b $xr0, 0 @@ -441,7 +440,6 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 96 # 32-byte Folded Reload xvst $xr0, $sp, 128 - ori $s1, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, -32768 xvreplgr2vr.d $xr0, $a0 @@ -511,8 +509,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 96 # 32-byte Folded Reload xvst $xr1, $sp, 128 - lu32i.d $s1, -1 - xvreplgr2vr.d $xr0, $s1 + xvldi $xr0, -1552 xvfcmp.caf.s $xr0, $xr0, $xr1 xvst $xr0, $sp, 160 addi.d $a0, $sp, 128 @@ -652,7 +649,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -224 - ld.d $s1, $sp, 192 # 8-byte Folded Reload ld.d $s0, $sp, 200 # 8-byte Folded Reload ld.d $fp, $sp, 208 # 8-byte Folded Reload ld.d $ra, $sp, 216 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_ceq_s.dir/lasx-xvfcmp_ceq_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_ceq_s.dir/lasx-xvfcmp_ceq_s.s index da269061..7cda0700 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_ceq_s.dir/lasx-xvfcmp_ceq_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_ceq_s.dir/lasx-xvfcmp_ceq_s.s @@ -941,9 +941,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvst $xr1, $sp, 192 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvfcmp.cueq.s $xr0, $xr0, $xr1 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cle_s.dir/lasx-xvfcmp_cle_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cle_s.dir/lasx-xvfcmp_cle_s.s index 9eba8732..ddd06b91 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cle_s.dir/lasx-xvfcmp_cle_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cle_s.dir/lasx-xvfcmp_cle_s.s @@ -961,11 +961,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - pcalau12i $a1, %pc_hi20(.LCPI2_34) - xvld $xr0, $a1, %pc_lo12(.LCPI2_34) - lu32i.d $a0, -1 - xvreplgr2vr.d $xr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_34) + xvld $xr0, $a0, %pc_lo12(.LCPI2_34) + xvldi $xr1, -1552 xvst $xr1, $sp, 224 xvfcmp.cule.s $xr0, $xr0, $xr1 xvst $xr0, $sp, 256 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_clt_s.dir/lasx-xvfcmp_clt_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_clt_s.dir/lasx-xvfcmp_clt_s.s index ecd93ffc..def4296e 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_clt_s.dir/lasx-xvfcmp_clt_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_clt_s.dir/lasx-xvfcmp_clt_s.s @@ -517,7 +517,6 @@ main: # @main st.d $ra, $sp, 248 # 8-byte Folded Spill st.d $fp, $sp, 240 # 8-byte Folded Spill st.d $s0, $sp, 232 # 8-byte Folded Spill - st.d $s1, $sp, 224 # 8-byte Folded Spill addi.d $fp, $sp, 256 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -717,8 +716,8 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 128 # 32-byte Folded Reload xvst $xr0, $sp, 160 - addi.w $s1, $zero, -1 - lu52i.d $a0, $s1, 2046 + addi.w $a0, $zero, -1 + lu52i.d $a0, $a0, 2046 xvreplgr2vr.d $xr0, $a0 xvfcmp.clt.d $xr0, $xr0, $xr0 xvst $xr0, $sp, 192 @@ -823,8 +822,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu32i.d $s1, 0 - xvreplgr2vr.d $xr0, $s1 + xvldi $xr0, -1777 xvst $xr0, $sp, 160 xvfcmp.cult.s $xr0, $xr0, $xr0 xvst $xr0, $sp, 192 @@ -872,14 +870,11 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - xvld $xr0, $sp, 96 # 32-byte Folded Reload - xvst $xr0, $sp, 160 pcalau12i $a0, %pc_hi20(.LCPI2_32) xvld $xr0, $a0, %pc_lo12(.LCPI2_32) - ori $s1, $zero, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr1, $a0 + xvld $xr1, $sp, 96 # 32-byte Folded Reload + xvst $xr1, $sp, 160 + xvldi $xr1, -1552 xvfcmp.cult.s $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 @@ -934,8 +929,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 128 # 32-byte Folded Reload xvst $xr0, $sp, 160 - lu32i.d $s1, 65535 - xvreplgr2vr.d $xr0, $s1 + xvldi $xr0, -1744 xvfcmp.cult.d $xr0, $xr0, $xr0 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 @@ -985,7 +979,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -256 - ld.d $s1, $sp, 224 # 8-byte Folded Reload ld.d $s0, $sp, 232 # 8-byte Folded Reload ld.d $fp, $sp, 240 # 8-byte Folded Reload ld.d $ra, $sp, 248 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cne_s.dir/lasx-xvfcmp_cne_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cne_s.dir/lasx-xvfcmp_cne_s.s index 56e48b48..f3b4cc01 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cne_s.dir/lasx-xvfcmp_cne_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cne_s.dir/lasx-xvfcmp_cne_s.s @@ -596,14 +596,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $s1, $zero, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1552 xvst $xr0, $sp, 192 - ori $a0, $zero, 0 - lu32i.d $a0, 65535 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1744 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvfcmp.cne.s $xr0, $xr0, $xr1 xvst $xr0, $sp, 224 @@ -832,13 +827,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvst $xr0, $sp, 192 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1789 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvfcmp.cune.s $xr0, $xr0, $xr1 xvst $xr0, $sp, 224 @@ -986,6 +977,7 @@ main: # @main xvst $xr0, $sp, 192 pcalau12i $a0, %pc_hi20(.LCPI2_32) xvld $xr0, $a0, %pc_lo12(.LCPI2_32) + ori $s1, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, -524288 lu52i.d $a0, $a0, -513 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cor_s.dir/lasx-xvfcmp_cor_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cor_s.dir/lasx-xvfcmp_cor_s.s index 690e3cc7..c855d048 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cor_s.dir/lasx-xvfcmp_cor_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cor_s.dir/lasx-xvfcmp_cor_s.s @@ -558,13 +558,11 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - xvld $xr0, $sp, 160 # 32-byte Folded Reload - xvst $xr0, $sp, 192 pcalau12i $a0, %pc_hi20(.LCPI2_14) xvld $xr0, $a0, %pc_lo12(.LCPI2_14) - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr1, $a0 + xvld $xr1, $sp, 160 # 32-byte Folded Reload + xvst $xr1, $sp, 192 + xvldi $xr1, -1552 xvfcmp.cor.d $xr0, $xr1, $xr0 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cun_s.dir/lasx-xvfcmp_cun_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cun_s.dir/lasx-xvfcmp_cun_s.s index 05d81e15..90c69660 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cun_s.dir/lasx-xvfcmp_cun_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_cun_s.dir/lasx-xvfcmp_cun_s.s @@ -476,8 +476,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $sp, 128 - lu12i.w $a0, -4096 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1544 xvld $xr1, $sp, 96 # 32-byte Folded Reload xvfcmp.cun.d $xr0, $xr1, $xr0 xvst $xr0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_saf_s.dir/lasx-xvfcmp_saf_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_saf_s.dir/lasx-xvfcmp_saf_s.s index 6d1d421e..60d1a56e 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_saf_s.dir/lasx-xvfcmp_saf_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_saf_s.dir/lasx-xvfcmp_saf_s.s @@ -493,12 +493,11 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 128 # 32-byte Folded Reload xvst $xr0, $sp, 160 - lu12i.w $a0, 256000 - xvreplgr2vr.w $xr0, $a0 lu12i.w $a0, 8224 ori $a0, $a0, 258 - xvreplgr2vr.w $xr1, $a0 - xvfcmp.saf.s $xr0, $xr1, $xr0 + xvreplgr2vr.w $xr0, $a0 + xvldi $xr1, -1456 + xvfcmp.saf.s $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 addi.d $a1, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_seq_s.dir/lasx-xvfcmp_seq_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_seq_s.dir/lasx-xvfcmp_seq_s.s index 5a5b21e8..77eba186 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_seq_s.dir/lasx-xvfcmp_seq_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_seq_s.dir/lasx-xvfcmp_seq_s.s @@ -518,7 +518,6 @@ main: # @main st.d $ra, $sp, 344 # 8-byte Folded Spill st.d $fp, $sp, 336 # 8-byte Folded Spill st.d $s0, $sp, 328 # 8-byte Folded Spill - st.d $s1, $sp, 320 # 8-byte Folded Spill addi.d $fp, $sp, 352 bstrins.d $sp, $zero, 4, 0 xvrepli.b $xr0, -1 @@ -538,14 +537,11 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - addi.w $s1, $zero, -1 - move $a0, $s1 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - xvld $xr0, $a1, %pc_lo12(.LCPI2_0) - pcalau12i $a1, %pc_hi20(.LCPI2_1) - xvld $xr1, $a1, %pc_lo12(.LCPI2_1) - lu32i.d $a0, 0 - xvreplgr2vr.d $xr2, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_0) + xvld $xr0, $a0, %pc_lo12(.LCPI2_0) + pcalau12i $a0, %pc_hi20(.LCPI2_1) + xvld $xr1, $a0, %pc_lo12(.LCPI2_1) + xvldi $xr2, -1777 xvst $xr2, $sp, 256 xvfcmp.seq.s $xr0, $xr1, $xr0 xvst $xr0, $sp, 288 @@ -957,13 +953,11 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - pcalau12i $a1, %pc_hi20(.LCPI2_25) - xvld $xr0, $a1, %pc_lo12(.LCPI2_25) - pcalau12i $a1, %pc_hi20(.LCPI2_26) - xvld $xr1, $a1, %pc_lo12(.LCPI2_26) - lu32i.d $a0, -1 - xvreplgr2vr.d $xr2, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_25) + xvld $xr0, $a0, %pc_lo12(.LCPI2_25) + pcalau12i $a0, %pc_hi20(.LCPI2_26) + xvld $xr1, $a0, %pc_lo12(.LCPI2_26) + xvldi $xr2, -1552 xvst $xr2, $sp, 160 # 32-byte Folded Spill xvst $xr2, $sp, 256 xvfcmp.sueq.s $xr0, $xr1, $xr0 @@ -977,7 +971,8 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 192 # 32-byte Folded Reload xvst $xr0, $sp, 256 - lu52i.d $a0, $s1, 2046 + addi.w $a0, $zero, -1 + lu52i.d $a0, $a0, 2046 xvreplgr2vr.d $xr0, $a0 lu12i.w $a0, -144 ori $a0, $a0, 342 @@ -1181,7 +1176,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -352 - ld.d $s1, $sp, 320 # 8-byte Folded Reload ld.d $s0, $sp, 328 # 8-byte Folded Reload ld.d $fp, $sp, 336 # 8-byte Folded Reload ld.d $ra, $sp, 344 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_sle_s.dir/lasx-xvfcmp_sle_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_sle_s.dir/lasx-xvfcmp_sle_s.s index ced3e248..089b079e 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_sle_s.dir/lasx-xvfcmp_sle_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_sle_s.dir/lasx-xvfcmp_sle_s.s @@ -463,9 +463,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvst $xr1, $sp, 160 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvfcmp.sle.s $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 @@ -669,8 +667,7 @@ main: # @main xvld $xr0, $a0, %pc_lo12(.LCPI2_16) xvld $xr1, $sp, 64 # 32-byte Folded Reload xvst $xr1, $sp, 160 - lu12i.w $a0, 4080 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -1788 xvfcmp.sle.d $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_slt_s.dir/lasx-xvfcmp_slt_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_slt_s.dir/lasx-xvfcmp_slt_s.s index 93262915..efe65c21 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_slt_s.dir/lasx-xvfcmp_slt_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_slt_s.dir/lasx-xvfcmp_slt_s.s @@ -1281,9 +1281,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1552 xvst $xr0, $sp, 288 lu52i.d $a0, $zero, 2047 xvreplgr2vr.d $xr1, $a0 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_sne_s.dir/lasx-xvfcmp_sne_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_sne_s.dir/lasx-xvfcmp_sne_s.s index 69881cfa..f86ec8ec 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_sne_s.dir/lasx-xvfcmp_sne_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_sne_s.dir/lasx-xvfcmp_sne_s.s @@ -700,11 +700,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - ori $a1, $zero, 0 - lu32i.d $a1, -1 - xvreplgr2vr.d $xr0, $a1 + xvldi $xr0, -1552 xvst $xr0, $sp, 224 + ori $a0, $zero, 0 lu32i.d $a0, 32768 lu52i.d $a0, $a0, -2048 xvreplgr2vr.d $xr0, $a0 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_sor_s.dir/lasx-xvfcmp_sor_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_sor_s.dir/lasx-xvfcmp_sor_s.s index dff226eb..85e580fa 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_sor_s.dir/lasx-xvfcmp_sor_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcmp_sor_s.dir/lasx-xvfcmp_sor_s.s @@ -581,9 +581,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 96 # 32-byte Folded Reload xvst $xr0, $sp, 128 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvld $xr1, $sp, 64 # 32-byte Folded Reload xvfcmp.sor.d $xr0, $xr0, $xr1 xvst $xr0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcvt.dir/lasx-xvfcvt.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcvt.dir/lasx-xvfcvt.s index 13855726..08d4522a 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcvt.dir/lasx-xvfcvt.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcvt.dir/lasx-xvfcvt.s @@ -460,9 +460,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 7 - ori $a0, $a0, 3072 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2692 xvst $xr0, $sp, 224 xvrepli.b $xr0, 109 xvfcvt.h.s $xr0, $xr0, $xr0 @@ -615,9 +613,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvst $xr1, $sp, 160 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvfcvt.s.d $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcvth.dir/lasx-xvfcvth.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcvth.dir/lasx-xvfcvth.s index c2198114..4bd69332 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcvth.dir/lasx-xvfcvth.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfcvth.dir/lasx-xvfcvth.s @@ -582,8 +582,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_11) xvld $xr0, $a0, %pc_lo12(.LCPI2_11) - lu12i.w $a0, -524288 - xvreplgr2vr.w $xr1, $a0 + xvldi $xr1, -3200 xvst $xr1, $sp, 128 xvfcvth.s.h $xr0, $xr0 xvst $xr0, $sp, 224 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvffint-1.dir/lasx-xvffint-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvffint-1.dir/lasx-xvffint-1.s index c9b8ec91..0ff24da3 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvffint-1.dir/lasx-xvffint-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvffint-1.dir/lasx-xvffint-1.s @@ -488,8 +488,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu52i.d $a0, $zero, -1025 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -784 xvst $xr0, $sp, 160 xvrepli.b $xr0, -1 xvst $xr0, $sp, 32 # 32-byte Folded Spill diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvffint-2.dir/lasx-xvffint-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvffint-2.dir/lasx-xvffint-2.s index d89e5988..9e7e9aed 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvffint-2.dir/lasx-xvffint-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvffint-2.dir/lasx-xvffint-2.s @@ -388,8 +388,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 266240 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1248 xvst $xr0, $sp, 96 xvrepli.d $xr0, 8 xvffint.s.wu $xr0, $xr0 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvffinth.dir/lasx-xvffinth.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvffinth.dir/lasx-xvffinth.s index 2c29bf93..fe8c98e1 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvffinth.dir/lasx-xvffinth.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvffinth.dir/lasx-xvffinth.s @@ -643,8 +643,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu52i.d $a0, $zero, -1025 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -784 xvst $xr0, $sp, 96 xvrepli.b $xr0, -1 xvffinth.d.w $xr0, $xr0 @@ -684,10 +683,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -458752 - lu52i.d $a0, $a0, -1021 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -839 xvst $xr0, $sp, 96 xvrepli.w $xr0, -25 xvffinth.d.w $xr0, $xr0 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfmadd_d.dir/lasx-xvfmadd_d.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfmadd_d.dir/lasx-xvfmadd_d.s index 0aad545c..3759f81d 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfmadd_d.dir/lasx-xvfmadd_d.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfmadd_d.dir/lasx-xvfmadd_d.s @@ -505,8 +505,7 @@ main: # @main ori $a0, $a0, 3932 xvreplgr2vr.d $xr0, $a0 xvst $xr0, $sp, 96 - ori $a0, $zero, 4095 - xvreplgr2vr.w $xr1, $a0 + xvldi $xr1, -2545 xvfmsub.d $xr0, $xr1, $xr1, $xr0 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 @@ -570,9 +569,7 @@ main: # @main xvst $xr2, $sp, 96 lu52i.d $a0, $zero, 2047 xvreplgr2vr.d $xr0, $a0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -1777 xvfmsub.d $xr0, $xr1, $xr2, $xr0 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfmaxa_s.dir/lasx-xvfmaxa_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfmaxa_s.dir/lasx-xvfmaxa_s.s index a20bc86f..4b8f92da 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfmaxa_s.dir/lasx-xvfmaxa_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfmaxa_s.dir/lasx-xvfmaxa_s.s @@ -601,8 +601,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 64 # 32-byte Folded Reload xvst $xr1, $sp, 96 - lu12i.w $a0, 256 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3568 xvfmina.s $xr0, $xr0, $xr1 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfnmadd_s.dir/lasx-xvfnmadd_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfnmadd_s.dir/lasx-xvfnmadd_s.s index 50ca32d0..18339167 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfnmadd_s.dir/lasx-xvfnmadd_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfnmadd_s.dir/lasx-xvfnmadd_s.s @@ -580,7 +580,6 @@ main: # @main st.d $ra, $sp, 216 # 8-byte Folded Spill st.d $fp, $sp, 208 # 8-byte Folded Spill st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill addi.d $fp, $sp, 224 bstrins.d $sp, $zero, 4, 0 lu12i.w $a0, -11 @@ -688,8 +687,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $s1, -524288 - xvreplgr2vr.w $xr0, $s1 + xvldi $xr0, -3200 xvst $xr0, $sp, 64 # 32-byte Folded Spill xvst $xr0, $sp, 128 xvld $xr0, $sp, 96 # 32-byte Folded Reload @@ -753,7 +751,8 @@ main: # @main lu12i.w $a0, 15 ori $a0, $a0, 3330 xvreplgr2vr.h $xr0, $a0 - lu52i.d $a0, $s1, 2047 + lu12i.w $a0, -524288 + lu52i.d $a0, $a0, 2047 xvreplgr2vr.d $xr1, $a0 xvfnmadd.s $xr0, $xr1, $xr0, $xr0 xvst $xr0, $sp, 160 @@ -970,7 +969,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -224 - ld.d $s1, $sp, 192 # 8-byte Folded Reload ld.d $s0, $sp, 200 # 8-byte Folded Reload ld.d $fp, $sp, 208 # 8-byte Folded Reload ld.d $ra, $sp, 216 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfrint_d.dir/lasx-xvfrint_d.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfrint_d.dir/lasx-xvfrint_d.s index 8f891b7c..ca51a194 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfrint_d.dir/lasx-xvfrint_d.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfrint_d.dir/lasx-xvfrint_d.s @@ -736,8 +736,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_16) xvld $xr0, $a0, %pc_lo12(.LCPI2_16) - lu52i.d $a0, $zero, 1023 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -912 xvst $xr1, $sp, 192 xvfrintrp.d $xr0, $xr0 xvst $xr0, $sp, 224 @@ -885,9 +884,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 7 - ori $a0, $a0, 3072 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2692 xvst $xr0, $sp, 192 xvfrintrm.d $xr0, $xr0 xvst $xr0, $sp, 224 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfrint_s.dir/lasx-xvfrint_s.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfrint_s.dir/lasx-xvfrint_s.s index 53ff92c6..8ceeaa6a 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfrint_s.dir/lasx-xvfrint_s.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfrint_s.dir/lasx-xvfrint_s.s @@ -737,9 +737,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvst $xr0, $sp, 224 xvfrintrp.s $xr0, $xr0 xvst $xr0, $sp, 256 @@ -763,8 +761,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_11) xvld $xr0, $a0, %pc_lo12(.LCPI2_11) - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr1, $a0 + xvldi $xr1, -1424 xvst $xr1, $sp, 224 xvfrintrp.s $xr0, $xr0 xvst $xr0, $sp, 256 @@ -1059,8 +1056,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 128 # 32-byte Folded Reload xvst $xr0, $sp, 160 - lu12i.w $a0, 4080 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1788 xvfrint.s $xr0, $xr0 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfrstp.dir/lasx-xvfrstp.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfrstp.dir/lasx-xvfrstp.s index 3977b6f6..51a358b8 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfrstp.dir/lasx-xvfrstp.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvfrstp.dir/lasx-xvfrstp.s @@ -861,9 +861,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_22) xvld $xr0, $a0, %pc_lo12(.LCPI2_22) xvst $xr0, $sp, 160 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvfrstp.h $xr0, $xr1, $xr1 xvst $xr0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvftint-2.dir/lasx-xvftint-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvftint-2.dir/lasx-xvftint-2.s index 431197ed..8ab8bc93 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvftint-2.dir/lasx-xvftint-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvftint-2.dir/lasx-xvftint-2.s @@ -757,8 +757,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -524288 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3200 xvst $xr0, $sp, 96 # 32-byte Folded Spill xvst $xr0, $sp, 256 lu12i.w $a0, 15 @@ -944,8 +943,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 224 # 32-byte Folded Reload xvst $xr0, $sp, 256 - lu12i.w $a0, 8 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2688 xvftintrp.w.s $xr0, $xr0 xvst $xr0, $sp, 288 addi.d $a0, $sp, 256 @@ -1853,8 +1851,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 224 # 32-byte Folded Reload xvst $xr1, $sp, 256 - lu12i.w $a0, 2048 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3456 xvftintrm.w.d $xr0, $xr1, $xr0 xvst $xr0, $sp, 288 addi.d $a0, $sp, 256 @@ -1869,7 +1866,6 @@ main: # @main lu12i.w $a0, 128 lu52i.d $a0, $a0, 506 xvreplgr2vr.d $xr0, $a0 - ori $s1, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, -524288 lu52i.d $a0, $a0, 2047 @@ -1977,8 +1973,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 224 # 32-byte Folded Reload xvst $xr0, $sp, 256 - lu32i.d $s1, -1 - xvreplgr2vr.d $xr0, $s1 + xvldi $xr0, -1552 xvrepli.d $xr1, -8 xvftint.w.d $xr0, $xr1, $xr0 xvst $xr0, $sp, 288 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvhaddw-2.dir/lasx-xvhaddw-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvhaddw-2.dir/lasx-xvhaddw-2.s index 74c2104d..a98418a4 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvhaddw-2.dir/lasx-xvhaddw-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvhaddw-2.dir/lasx-xvhaddw-2.s @@ -1454,13 +1454,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_47) xvld $xr0, $a0, %pc_lo12(.LCPI2_47) - xvst $xr0, $sp, 160 pcalau12i $a0, %pc_hi20(.LCPI2_48) - xvld $xr0, $a0, %pc_lo12(.LCPI2_48) - lu12i.w $a0, 3 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr1, $a0 - xvhaddw.du.wu $xr0, $xr1, $xr0 + xvld $xr1, $a0, %pc_lo12(.LCPI2_48) + xvst $xr0, $sp, 160 + xvldi $xr0, -2497 + xvhaddw.du.wu $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 addi.d $a1, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvhsubw-1.dir/lasx-xvhsubw-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvhsubw-1.dir/lasx-xvhsubw-1.s index 2909895c..8ee60f1c 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvhsubw-1.dir/lasx-xvhsubw-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvhsubw-1.dir/lasx-xvhsubw-1.s @@ -772,7 +772,6 @@ main: # @main st.d $ra, $sp, 248 # 8-byte Folded Spill st.d $fp, $sp, 240 # 8-byte Folded Spill st.d $s0, $sp, 232 # 8-byte Folded Spill - st.d $s1, $sp, 224 # 8-byte Folded Spill addi.d $fp, $sp, 256 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -864,8 +863,8 @@ main: # @main jirl $ra, $ra, 0 xvrepli.d $xr0, 18 xvst $xr0, $sp, 160 - lu12i.w $s1, 7 - ori $a0, $s1, 4078 + lu12i.w $a0, 7 + ori $a0, $a0, 4078 xvreplgr2vr.d $xr0, $a0 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvhsubw.h.b $xr0, $xr1, $xr0 @@ -877,9 +876,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -1789 xvst $xr1, $sp, 32 # 32-byte Folded Spill xvst $xr1, $sp, 160 xvld $xr0, $sp, 128 # 32-byte Folded Reload @@ -907,11 +904,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - ori $a1, $zero, 0 - lu32i.d $a1, 65535 - xvreplgr2vr.d $xr0, $a1 + xvldi $xr0, -1744 xvst $xr0, $sp, 160 + ori $a0, $zero, 0 lu32i.d $a0, 1 xvreplgr2vr.d $xr0, $a0 xvld $xr1, $sp, 128 # 32-byte Folded Reload @@ -1143,8 +1138,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_31) xvld $xr1, $a0, %pc_lo12(.LCPI2_31) xvst $xr0, $sp, 160 - ori $a0, $s1, 3072 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2692 xvhsubw.d.w $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 @@ -1216,8 +1210,7 @@ main: # @main jirl $ra, $ra, 0 xvrepli.d $xr0, -512 xvst $xr0, $sp, 160 - ori $a0, $zero, 512 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3838 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvhsubw.d.w $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 @@ -1339,7 +1332,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -256 - ld.d $s1, $sp, 224 # 8-byte Folded Reload ld.d $s0, $sp, 232 # 8-byte Folded Reload ld.d $fp, $sp, 240 # 8-byte Folded Reload ld.d $ra, $sp, 248 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvhsubw-2.dir/lasx-xvhsubw-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvhsubw-2.dir/lasx-xvhsubw-2.s index 0f221378..c915e474 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvhsubw-2.dir/lasx-xvhsubw-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvhsubw-2.dir/lasx-xvhsubw-2.s @@ -894,7 +894,6 @@ main: # @main st.d $ra, $sp, 248 # 8-byte Folded Spill st.d $fp, $sp, 240 # 8-byte Folded Spill st.d $s0, $sp, 232 # 8-byte Folded Spill - st.d $s1, $sp, 224 # 8-byte Folded Spill addi.d $fp, $sp, 256 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -1007,9 +1006,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvst $xr0, $sp, 160 xvrepli.b $xr0, -1 xvld $xr1, $sp, 128 # 32-byte Folded Reload @@ -1057,8 +1054,8 @@ main: # @main xvld $xr1, $a0, %pc_lo12(.LCPI2_20) xvst $xr1, $sp, 96 # 32-byte Folded Spill lu12i.w $a0, 3 - ori $s1, $a0, 4095 - xvreplgr2vr.d $xr0, $s1 + ori $a0, $a0, 4095 + xvreplgr2vr.d $xr0, $a0 xvhsubw.wu.hu $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 @@ -1379,7 +1376,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_52) xvld $xr0, $a0, %pc_lo12(.LCPI2_52) xvst $xr0, $sp, 160 - xvreplgr2vr.w $xr0, $s1 + xvldi $xr0, -2497 xvrepli.h $xr1, -2 xvhsubw.qu.du $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 @@ -1408,7 +1405,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -256 - ld.d $s1, $sp, 224 # 8-byte Folded Reload ld.d $s0, $sp, 232 # 8-byte Folded Reload ld.d $fp, $sp, 240 # 8-byte Folded Reload ld.d $ra, $sp, 248 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvilvh.dir/lasx-xvilvh.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvilvh.dir/lasx-xvilvh.s index aa8c804c..f8aba32d 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvilvh.dir/lasx-xvilvh.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvilvh.dir/lasx-xvilvh.s @@ -902,8 +902,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_12) xvld $xr1, $a0, %pc_lo12(.LCPI2_12) xvst $xr0, $sp, 160 - lu52i.d $a0, $zero, -1025 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -784 xvilvh.h $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 @@ -947,9 +946,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_18) xvld $xr0, $a0, %pc_lo12(.LCPI2_18) xvst $xr0, $sp, 160 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvilvh.h $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 @@ -1055,11 +1052,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1789 xvst $xr0, $sp, 160 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvilvh.w $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 @@ -1201,8 +1196,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_42) xvld $xr0, $a0, %pc_lo12(.LCPI2_42) xvst $xr0, $sp, 160 - lu12i.w $a0, -524288 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3200 xvld $xr1, $sp, 32 # 32-byte Folded Reload xvilvh.d $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvilvl.dir/lasx-xvilvl.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvilvl.dir/lasx-xvilvl.s index 93c72b16..69b52535 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvilvl.dir/lasx-xvilvl.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvilvl.dir/lasx-xvilvl.s @@ -1001,14 +1001,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_12) xvld $xr0, $a0, %pc_lo12(.LCPI2_12) - xvst $xr0, $sp, 192 pcalau12i $a0, %pc_hi20(.LCPI2_13) - xvld $xr0, $a0, %pc_lo12(.LCPI2_13) - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr1, $a0 - xvst $xr1, $sp, 96 # 32-byte Folded Spill - xvilvl.b $xr0, $xr1, $xr0 + xvld $xr1, $a0, %pc_lo12(.LCPI2_13) + xvst $xr0, $sp, 192 + xvldi $xr0, -1777 + xvst $xr0, $sp, 96 # 32-byte Folded Spill + xvilvl.b $xr0, $xr0, $xr1 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 addi.d $a1, $sp, 224 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvldi.dir/lasx-xvldi.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvldi.dir/lasx-xvldi.s index 8b929d12..09b72e6a 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvldi.dir/lasx-xvldi.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvldi.dir/lasx-xvldi.s @@ -231,129 +231,119 @@ check_lasx_fp_out: # @check_lasx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -320 - st.d $ra, $sp, 312 # 8-byte Folded Spill - st.d $fp, $sp, 304 # 8-byte Folded Spill - st.d $s0, $sp, 296 # 8-byte Folded Spill - addi.d $fp, $sp, 320 + addi.d $sp, $sp, -256 + st.d $ra, $sp, 248 # 8-byte Folded Spill + st.d $fp, $sp, 240 # 8-byte Folded Spill + st.d $s0, $sp, 232 # 8-byte Folded Spill + addi.d $fp, $sp, 256 bstrins.d $sp, $zero, 4, 0 xvrepli.w $xr0, 16 - xvst $xr0, $sp, 192 # 32-byte Folded Spill - xvst $xr0, $sp, 224 + xvst $xr0, $sp, 128 # 32-byte Folded Spill + xvst $xr0, $sp, 160 xvldi $xr0, -4080 - xvst $xr0, $sp, 160 # 32-byte Folded Spill - xvst $xr0, $sp, 256 + xvst $xr0, $sp, 96 # 32-byte Folded Spill + xvst $xr0, $sp, 192 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $s0, $a0, %pc_lo12(.L.str.5) - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 256 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 192 ori $a2, $zero, 32 ori $a4, $zero, 22 move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 xvrepli.h $xr0, -324 - xvst $xr0, $sp, 224 + xvst $xr0, $sp, 160 xvldi $xr0, 1724 - xvst $xr0, $sp, 256 - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 256 + xvst $xr0, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 192 ori $a2, $zero, 32 ori $a4, $zero, 27 move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, 65536 - lu52i.d $a0, $a0, 1021 - xvreplgr2vr.d $xr0, $a0 - xvst $xr0, $sp, 128 # 32-byte Folded Spill - xvst $xr0, $sp, 224 xvldi $xr0, -943 - xvst $xr0, $sp, 96 # 32-byte Folded Spill - xvst $xr0, $sp, 256 - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 256 + xvst $xr0, $sp, 64 # 32-byte Folded Spill + xvst $xr0, $sp, 160 + xvst $xr0, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 192 ori $a2, $zero, 32 ori $a4, $zero, 32 move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 xvrepli.h $xr0, -228 - xvst $xr0, $sp, 224 + xvst $xr0, $sp, 160 xvldi $xr0, 1820 - xvst $xr0, $sp, 256 - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 256 + xvst $xr0, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 192 ori $a2, $zero, 32 ori $a4, $zero, 37 move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 466944 - xvreplgr2vr.w $xr0, $a0 - xvst $xr0, $sp, 64 # 32-byte Folded Spill - xvst $xr0, $sp, 224 xvldi $xr0, -3214 xvst $xr0, $sp, 32 # 32-byte Folded Spill - xvst $xr0, $sp, 256 - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 256 + xvst $xr0, $sp, 160 + xvst $xr0, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 192 ori $a2, $zero, 32 ori $a4, $zero, 42 move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 xvrepli.w $xr0, -227 - xvst $xr0, $sp, 224 + xvst $xr0, $sp, 160 xvldi $xr0, 2845 - xvst $xr0, $sp, 256 - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 256 + xvst $xr0, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 192 ori $a2, $zero, 32 ori $a4, $zero, 47 move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - xvld $xr0, $sp, 192 # 32-byte Folded Reload - xvst $xr0, $sp, 224 - xvld $xr0, $sp, 160 # 32-byte Folded Reload - xvst $xr0, $sp, 256 - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 256 + xvld $xr0, $sp, 128 # 32-byte Folded Reload + xvst $xr0, $sp, 160 + xvld $xr0, $sp, 96 # 32-byte Folded Reload + xvst $xr0, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 192 ori $a2, $zero, 32 ori $a4, $zero, 52 move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - xvld $xr0, $sp, 128 # 32-byte Folded Reload - xvst $xr0, $sp, 224 - xvld $xr0, $sp, 96 # 32-byte Folded Reload - xvst $xr0, $sp, 256 - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 256 + xvld $xr0, $sp, 64 # 32-byte Folded Reload + xvst $xr0, $sp, 160 + xvst $xr0, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 192 ori $a2, $zero, 32 ori $a4, $zero, 57 move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - xvld $xr0, $sp, 64 # 32-byte Folded Reload - xvst $xr0, $sp, 224 xvld $xr0, $sp, 32 # 32-byte Folded Reload - xvst $xr0, $sp, 256 - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 256 + xvst $xr0, $sp, 160 + xvst $xr0, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 192 ori $a2, $zero, 32 ori $a4, $zero, 62 move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 move $a0, $zero - addi.d $sp, $fp, -320 - ld.d $s0, $sp, 296 # 8-byte Folded Reload - ld.d $fp, $sp, 304 # 8-byte Folded Reload - ld.d $ra, $sp, 312 # 8-byte Folded Reload - addi.d $sp, $sp, 320 + addi.d $sp, $fp, -256 + ld.d $s0, $sp, 232 # 8-byte Folded Reload + ld.d $fp, $sp, 240 # 8-byte Folded Reload + ld.d $ra, $sp, 248 # 8-byte Folded Reload + addi.d $sp, $sp, 256 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmadd.dir/lasx-xvmadd.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmadd.dir/lasx-xvmadd.s index faf9f1ec..cf3781b0 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmadd.dir/lasx-xvmadd.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmadd.dir/lasx-xvmadd.s @@ -525,9 +525,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 7 - ori $a0, $a0, 3072 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2692 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 @@ -706,8 +704,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -524288 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3200 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwev-1.dir/lasx-xvmaddwev-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwev-1.dir/lasx-xvmaddwev-1.s index 3c3eaa4c..63f17bce 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwev-1.dir/lasx-xvmaddwev-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwev-1.dir/lasx-xvmaddwev-1.s @@ -1297,15 +1297,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_49) xvld $xr0, $a0, %pc_lo12(.LCPI2_49) - xvst $xr0, $sp, 192 pcalau12i $a0, %pc_hi20(.LCPI2_50) - xvld $xr0, $a0, %pc_lo12(.LCPI2_50) - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr1, $a0 - xvst $xr1, $sp, 32 # 32-byte Folded Spill - xvmaddwev.q.d $xr0, $xr1, $xr0 - xvst $xr0, $sp, 224 + xvld $xr1, $a0, %pc_lo12(.LCPI2_50) + xvst $xr0, $sp, 192 + xvldi $xr0, -1777 + xvst $xr0, $sp, 32 # 32-byte Folded Spill + xvmaddwev.q.d $xr1, $xr0, $xr1 + xvst $xr1, $sp, 224 addi.d $a0, $sp, 192 addi.d $a1, $sp, 224 ori $a2, $zero, 32 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwev-3.dir/lasx-xvmaddwev-3.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwev-3.dir/lasx-xvmaddwev-3.s index 720dbbcd..cf61dbad 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwev-3.dir/lasx-xvmaddwev-3.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwev-3.dir/lasx-xvmaddwev-3.s @@ -1139,7 +1139,6 @@ main: # @main st.d $ra, $sp, 280 # 8-byte Folded Spill st.d $fp, $sp, 272 # 8-byte Folded Spill st.d $s0, $sp, 264 # 8-byte Folded Spill - st.d $s1, $sp, 256 # 8-byte Folded Spill addi.d $fp, $sp, 288 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -1491,7 +1490,6 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $s1, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, 1 xvreplgr2vr.d $xr0, $a0 @@ -1640,8 +1638,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_57) xvld $xr1, $a0, %pc_lo12(.LCPI2_57) xvst $xr0, $sp, 192 - lu32i.d $s1, -1 - xvreplgr2vr.d $xr0, $s1 + xvldi $xr0, -1552 xvld $xr2, $sp, 128 # 32-byte Folded Reload xvmaddwev.d.wu.w $xr0, $xr2, $xr1 xvst $xr0, $sp, 224 @@ -1825,7 +1822,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -288 - ld.d $s1, $sp, 256 # 8-byte Folded Reload ld.d $s0, $sp, 264 # 8-byte Folded Reload ld.d $fp, $sp, 272 # 8-byte Folded Reload ld.d $ra, $sp, 280 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwod-1.dir/lasx-xvmaddwod-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwod-1.dir/lasx-xvmaddwod-1.s index 8cc39379..146cd398 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwod-1.dir/lasx-xvmaddwod-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwod-1.dir/lasx-xvmaddwod-1.s @@ -846,7 +846,6 @@ main: # @main st.d $ra, $sp, 280 # 8-byte Folded Spill st.d $fp, $sp, 272 # 8-byte Folded Spill st.d $s0, $sp, 264 # 8-byte Folded Spill - st.d $s1, $sp, 256 # 8-byte Folded Spill addi.d $fp, $sp, 288 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -896,11 +895,8 @@ main: # @main lu12i.w $a0, 524279 ori $a0, $a0, 4095 xvreplgr2vr.d $xr0, $a0 - addi.w $s1, $zero, -1 xvst $xr0, $sp, 192 - move $a0, $s1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -1777 xvld $xr2, $sp, 160 # 32-byte Folded Reload xvmaddwod.h.b $xr0, $xr1, $xr2 xvst $xr0, $sp, 224 @@ -1015,14 +1011,13 @@ main: # @main lu12i.w $a0, 16 ori $a0, $a0, 513 lu32i.d $a0, 512 - xvreplgr2vr.d $xr0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI2_11) - xvld $xr1, $a0, %pc_lo12(.LCPI2_11) - xvst $xr0, $sp, 192 - ori $a0, $zero, 512 - xvreplgr2vr.w $xr0, $a0 - xvmaddwod.h.b $xr0, $xr1, $xr1 - xvst $xr0, $sp, 224 + pcalau12i $a1, %pc_hi20(.LCPI2_11) + xvld $xr0, $a1, %pc_lo12(.LCPI2_11) + xvreplgr2vr.d $xr1, $a0 + xvst $xr1, $sp, 192 + xvldi $xr1, -3838 + xvmaddwod.h.b $xr1, $xr0, $xr0 + xvst $xr1, $sp, 224 addi.d $a0, $sp, 192 addi.d $a1, $sp, 224 ori $a2, $zero, 32 @@ -1220,9 +1215,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvst $xr1, $sp, 192 - ori $a0, $zero, 0 - lu32i.d $a0, 65535 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1744 xvld $xr2, $sp, 160 # 32-byte Folded Reload xvmaddwod.d.w $xr1, $xr0, $xr2 xvst $xr1, $sp, 224 @@ -1347,13 +1340,14 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_45) xvld $xr0, $a0, %pc_lo12(.LCPI2_45) - pcalau12i $a0, %pc_hi20(.LCPI2_46) - xvld $xr1, $a0, %pc_lo12(.LCPI2_46) xvst $xr0, $sp, 192 - lu52i.d $a0, $s1, 2047 - xvreplgr2vr.d $xr0, $a0 - xvmaddwod.q.du.d $xr0, $xr1, $xr0 - xvst $xr0, $sp, 224 + pcalau12i $a0, %pc_hi20(.LCPI2_46) + xvld $xr0, $a0, %pc_lo12(.LCPI2_46) + addi.w $a0, $zero, -1 + lu52i.d $a0, $a0, 2047 + xvreplgr2vr.d $xr1, $a0 + xvmaddwod.q.du.d $xr1, $xr0, $xr1 + xvst $xr1, $sp, 224 addi.d $a0, $sp, 192 addi.d $a1, $sp, 224 ori $a2, $zero, 32 @@ -1710,7 +1704,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -288 - ld.d $s1, $sp, 256 # 8-byte Folded Reload ld.d $s0, $sp, 264 # 8-byte Folded Reload ld.d $fp, $sp, 272 # 8-byte Folded Reload ld.d $ra, $sp, 280 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwod-2.dir/lasx-xvmaddwod-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwod-2.dir/lasx-xvmaddwod-2.s index 7036c440..443f5bdb 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwod-2.dir/lasx-xvmaddwod-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwod-2.dir/lasx-xvmaddwod-2.s @@ -680,7 +680,6 @@ main: # @main st.d $ra, $sp, 248 # 8-byte Folded Spill st.d $fp, $sp, 240 # 8-byte Folded Spill st.d $s0, $sp, 232 # 8-byte Folded Spill - st.d $s1, $sp, 224 # 8-byte Folded Spill addi.d $fp, $sp, 256 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -802,10 +801,7 @@ main: # @main xvreplgr2vr.d $xr0, $a1 xvst $xr0, $sp, 160 xvreplgr2vr.d $xr0, $a0 - ori $a0, $zero, 0 - lu32i.d $a0, -458752 - lu52i.d $a0, $a0, -1021 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -839 xvmaddwod.h.bu $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 @@ -898,8 +894,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - addi.w $s1, $zero, -1 - move $a0, $s1 + addi.w $a0, $zero, -1 lu32i.d $a0, 255 lu52i.d $a0, $a0, -2 xvreplgr2vr.d $xr0, $a0 @@ -1015,8 +1010,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_23) xvld $xr0, $a0, %pc_lo12(.LCPI2_23) xvst $xr0, $sp, 160 - lu32i.d $s1, 0 - xvreplgr2vr.d $xr1, $s1 + xvldi $xr1, -1777 xvld $xr2, $sp, 128 # 32-byte Folded Reload xvmaddwod.d.wu $xr0, $xr2, $xr1 xvst $xr0, $sp, 192 @@ -1142,7 +1136,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -256 - ld.d $s1, $sp, 224 # 8-byte Folded Reload ld.d $s0, $sp, 232 # 8-byte Folded Reload ld.d $fp, $sp, 240 # 8-byte Folded Reload ld.d $ra, $sp, 248 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwod-3.dir/lasx-xvmaddwod-3.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwod-3.dir/lasx-xvmaddwod-3.s index a4d1c30c..21ed1c08 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwod-3.dir/lasx-xvmaddwod-3.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmaddwod-3.dir/lasx-xvmaddwod-3.s @@ -969,8 +969,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu52i.d $a0, $zero, -1025 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -784 xvst $xr0, $sp, 128 xvld $xr1, $sp, 96 # 32-byte Folded Reload xvmaddwod.w.hu.h $xr0, $xr1, $xr1 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmax-1.dir/lasx-xvmax-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmax-1.dir/lasx-xvmax-1.s index 89bc1749..b848ddfc 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmax-1.dir/lasx-xvmax-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmax-1.dir/lasx-xvmax-1.s @@ -335,8 +335,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -4096 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1544 xvst $xr0, $sp, 96 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 @@ -475,8 +474,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu52i.d $a0, $zero, 1024 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1024 xvst $xr0, $sp, 96 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmod-1.dir/lasx-xvmod-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmod-1.dir/lasx-xvmod-1.s index 3f239d56..cb27b38d 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmod-1.dir/lasx-xvmod-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmod-1.dir/lasx-xvmod-1.s @@ -283,8 +283,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu52i.d $a0, $zero, -16 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1664 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmod-2.dir/lasx-xvmod-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmod-2.dir/lasx-xvmod-2.s index c7effd55..a94e1326 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmod-2.dir/lasx-xvmod-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmod-2.dir/lasx-xvmod-2.s @@ -465,8 +465,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 4080 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1788 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmsub.dir/lasx-xvmsub.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmsub.dir/lasx-xvmsub.s index f82c8a8e..2efedb55 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmsub.dir/lasx-xvmsub.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmsub.dir/lasx-xvmsub.s @@ -380,9 +380,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1789 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 @@ -392,8 +390,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu52i.d $a0, $zero, 1024 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1024 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmuh-1.dir/lasx-xvmuh-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmuh-1.dir/lasx-xvmuh-1.s index eac53850..e389cd8f 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmuh-1.dir/lasx-xvmuh-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmuh-1.dir/lasx-xvmuh-1.s @@ -1159,9 +1159,7 @@ main: # @main lu32i.d $a0, 0 xvreplgr2vr.d $xr0, $a0 xvst $xr0, $sp, 160 - ori $a0, $zero, 0 - lu32i.d $a0, 65535 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1744 xvmuh.d $xr0, $xr0, $xr0 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwev-2.dir/lasx-xvmulwev-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwev-2.dir/lasx-xvmulwev-2.s index 219c59a0..4a44e5ad 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwev-2.dir/lasx-xvmulwev-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwev-2.dir/lasx-xvmulwev-2.s @@ -1461,8 +1461,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 224 # 32-byte Folded Reload xvst $xr1, $sp, 256 - ori $a0, $zero, 512 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3838 xvmulwev.w.hu $xr0, $xr1, $xr0 xvst $xr0, $sp, 288 addi.d $a0, $sp, 256 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwev-3.dir/lasx-xvmulwev-3.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwev-3.dir/lasx-xvmulwev-3.s index 03563314..4652a8be 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwev-3.dir/lasx-xvmulwev-3.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwev-3.dir/lasx-xvmulwev-3.s @@ -1061,8 +1061,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvst $xr1, $sp, 192 - lu12i.w $a0, 2048 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3456 xvmulwev.d.wu.w $xr0, $xr1, $xr0 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwod-1.dir/lasx-xvmulwod-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwod-1.dir/lasx-xvmulwod-1.s index 8a81223a..75f77fb2 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwod-1.dir/lasx-xvmulwod-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwod-1.dir/lasx-xvmulwod-1.s @@ -1313,8 +1313,7 @@ main: # @main lu32i.d $a0, 256 xvreplgr2vr.d $xr0, $a0 xvst $xr0, $sp, 224 - lu12i.w $a0, 256 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3568 xvmulwod.d.w $xr0, $xr0, $xr0 xvst $xr0, $sp, 256 addi.d $a0, $sp, 224 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwod-2.dir/lasx-xvmulwod-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwod-2.dir/lasx-xvmulwod-2.s index d1d8e63f..dae744ad 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwod-2.dir/lasx-xvmulwod-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwod-2.dir/lasx-xvmulwod-2.s @@ -928,13 +928,9 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 96 # 32-byte Folded Reload xvst $xr0, $sp, 128 - lu12i.w $a0, 4 - xvreplgr2vr.w $xr0, $a0 - ori $a0, $zero, 0 - lu32i.d $a0, -458752 - lu52i.d $a0, $a0, -1021 - xvreplgr2vr.d $xr1, $a0 - xvmulwod.h.bu $xr0, $xr0, $xr1 + xvldi $xr0, -839 + xvldi $xr1, -3776 + xvmulwod.h.bu $xr0, $xr1, $xr0 xvst $xr0, $sp, 160 addi.d $a0, $sp, 128 addi.d $a1, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwod-3.dir/lasx-xvmulwod-3.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwod-3.dir/lasx-xvmulwod-3.s index 596518a6..876f53ab 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwod-3.dir/lasx-xvmulwod-3.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvmulwod-3.dir/lasx-xvmulwod-3.s @@ -627,7 +627,6 @@ main: # @main st.d $ra, $sp, 216 # 8-byte Folded Spill st.d $fp, $sp, 208 # 8-byte Folded Spill st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill addi.d $fp, $sp, 224 bstrins.d $sp, $zero, 4, 0 xvrepli.b $xr1, 0 @@ -673,12 +672,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - addi.w $s1, $zero, -1 xvld $xr1, $sp, 96 # 32-byte Folded Reload xvst $xr1, $sp, 128 - move $a0, $s1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvmulwod.h.bu.b $xr0, $xr0, $xr1 xvst $xr0, $sp, 160 addi.d $a0, $sp, 128 @@ -731,7 +727,8 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 96 # 32-byte Folded Reload xvst $xr1, $sp, 128 - lu52i.d $a0, $s1, 2046 + addi.w $a0, $zero, -1 + lu52i.d $a0, $a0, 2046 xvreplgr2vr.d $xr0, $a0 xvst $xr0, $sp, 32 # 32-byte Folded Spill xvmulwod.h.bu.b $xr0, $xr0, $xr1 @@ -1013,7 +1010,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -224 - ld.d $s1, $sp, 192 # 8-byte Folded Reload ld.d $s0, $sp, 200 # 8-byte Folded Reload ld.d $fp, $sp, 208 # 8-byte Folded Reload ld.d $ra, $sp, 216 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpackev.dir/lasx-xvpackev.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpackev.dir/lasx-xvpackev.s index c68ecf57..2f1675c3 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpackev.dir/lasx-xvpackev.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpackev.dir/lasx-xvpackev.s @@ -772,16 +772,11 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - move $a1, $a0 - lu32i.d $a1, -65281 - lu52i.d $a1, $a1, 15 - xvreplgr2vr.d $xr0, $a1 + xvldi $xr0, -1697 xvst $xr0, $sp, 96 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr1, $a0 - xvst $xr1, $sp, 32 # 32-byte Folded Spill xvrepli.b $xr0, -1 + xvldi $xr1, -1777 + xvst $xr1, $sp, 32 # 32-byte Folded Spill xvpackev.b $xr0, $xr1, $xr0 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpackod.dir/lasx-xvpackod.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpackod.dir/lasx-xvpackod.s index d591a3ed..0d43c95e 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpackod.dir/lasx-xvpackod.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpackod.dir/lasx-xvpackod.s @@ -719,7 +719,6 @@ main: # @main st.d $ra, $sp, 280 # 8-byte Folded Spill st.d $fp, $sp, 272 # 8-byte Folded Spill st.d $s0, $sp, 264 # 8-byte Folded Spill - st.d $s1, $sp, 256 # 8-byte Folded Spill addi.d $fp, $sp, 288 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -745,8 +744,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_4) xvld $xr1, $a0, %pc_lo12(.LCPI2_4) xvst $xr0, $sp, 192 - ori $a0, $zero, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2545 xvpackod.b $xr0, $xr1, $xr0 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 @@ -813,9 +811,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $s1, 15 - ori $a0, $s1, 4095 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1789 xvst $xr0, $sp, 192 xvpackod.b $xr0, $xr0, $xr0 xvst $xr0, $sp, 224 @@ -946,9 +942,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 7 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2433 xvst $xr0, $sp, 192 lu12i.w $a0, 524272 lu32i.d $a0, -65535 @@ -1098,7 +1092,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_34) xvld $xr0, $a0, %pc_lo12(.LCPI2_34) xvst $xr0, $sp, 192 - xvreplgr2vr.w $xr0, $s1 + xvldi $xr0, -3600 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvpackod.d $xr0, $xr1, $xr0 xvst $xr0, $sp, 224 @@ -1249,7 +1243,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -288 - ld.d $s1, $sp, 256 # 8-byte Folded Reload ld.d $s0, $sp, 264 # 8-byte Folded Reload ld.d $fp, $sp, 272 # 8-byte Folded Reload ld.d $ra, $sp, 280 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpickev.dir/lasx-xvpickev.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpickev.dir/lasx-xvpickev.s index c8f6fb3a..e014ee2e 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpickev.dir/lasx-xvpickev.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpickev.dir/lasx-xvpickev.s @@ -725,7 +725,6 @@ main: # @main st.d $ra, $sp, 248 # 8-byte Folded Spill st.d $fp, $sp, 240 # 8-byte Folded Spill st.d $s0, $sp, 232 # 8-byte Folded Spill - st.d $s1, $sp, 224 # 8-byte Folded Spill addi.d $fp, $sp, 256 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -803,10 +802,9 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_7) xvld $xr0, $a0, %pc_lo12(.LCPI2_7) xvst $xr0, $sp, 160 - lu12i.w $s1, -524288 - xvreplgr2vr.w $xr0, $s1 - xvrepli.b $xr1, -65 - xvpickev.b $xr0, $xr0, $xr1 + xvrepli.b $xr0, -65 + xvldi $xr1, -3200 + xvpickev.b $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 addi.d $a1, $sp, 192 @@ -850,9 +848,7 @@ main: # @main xvld $xr0, $a0, %pc_lo12(.LCPI2_11) xvst $xr0, $sp, 160 xvrepli.d $xr0, 1 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -1789 xvst $xr1, $sp, 32 # 32-byte Folded Spill xvpickev.b $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 @@ -1143,7 +1139,8 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_35) xvld $xr0, $a0, %pc_lo12(.LCPI2_35) xvst $xr0, $sp, 160 - lu52i.d $a0, $s1, 2047 + lu12i.w $a0, -524288 + lu52i.d $a0, $a0, 2047 xvreplgr2vr.d $xr0, $a0 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvpickev.w $xr0, $xr0, $xr1 @@ -1196,7 +1193,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -256 - ld.d $s1, $sp, 224 # 8-byte Folded Reload ld.d $s0, $sp, 232 # 8-byte Folded Reload ld.d $fp, $sp, 240 # 8-byte Folded Reload ld.d $ra, $sp, 248 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpickod.dir/lasx-xvpickod.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpickod.dir/lasx-xvpickod.s index 7a1b856d..98f1aec4 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpickod.dir/lasx-xvpickod.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpickod.dir/lasx-xvpickod.s @@ -785,8 +785,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_8) xvld $xr1, $a0, %pc_lo12(.LCPI2_8) xvst $xr0, $sp, 224 - ori $a0, $zero, 1024 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2812 xvpickod.h $xr0, $xr1, $xr0 xvst $xr0, $sp, 256 addi.d $a0, $sp, 224 @@ -848,8 +847,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_17) xvld $xr0, $a0, %pc_lo12(.LCPI2_17) xvst $xr0, $sp, 224 - lu52i.d $a0, $zero, -1025 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -784 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvpickod.h $xr0, $xr1, $xr0 xvst $xr0, $sp, 256 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpickve.dir/lasx-xvpickve.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpickve.dir/lasx-xvpickve.s index 722b6506..de8aab44 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpickve.dir/lasx-xvpickve.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvpickve.dir/lasx-xvpickve.s @@ -397,8 +397,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_8) xvld $xr0, $a0, %pc_lo12(.LCPI2_8) xvst $xr0, $sp, 96 - lu52i.d $a0, $zero, 1024 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1024 xvpickve.d $xr0, $xr0, 1 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvreplgr2vr.dir/lasx-xvreplgr2vr.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvreplgr2vr.dir/lasx-xvreplgr2vr.s index 6f2c8b08..b5f66283 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvreplgr2vr.dir/lasx-xvreplgr2vr.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvreplgr2vr.dir/lasx-xvreplgr2vr.s @@ -322,9 +322,10 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 + xvldi $xr0, -2812 + xvst $xr0, $sp, 224 ori $a0, $zero, 1024 xvreplgr2vr.h $xr0, $a0 - xvst $xr0, $sp, 224 xvst $xr0, $sp, 256 addi.d $a0, $sp, 224 addi.d $a1, $sp, 256 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvreplve.dir/lasx-xvreplve.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvreplve.dir/lasx-xvreplve.s index 06cb5444..e7d9298b 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvreplve.dir/lasx-xvreplve.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvreplve.dir/lasx-xvreplve.s @@ -1077,11 +1077,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 4080 - pcalau12i $a1, %pc_hi20(.LCPI2_17) - xvld $xr0, $a1, %pc_lo12(.LCPI2_17) - ori $a0, $a0, 255 - xvreplgr2vr.d $xr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_17) + xvld $xr0, $a0, %pc_lo12(.LCPI2_17) + xvldi $xr1, -1787 xvst $xr1, $sp, 160 xvreplve.d $xr0, $xr0, $zero xvst $xr0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvrotr.dir/lasx-xvrotr.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvrotr.dir/lasx-xvrotr.s index ea4096cc..828f51de 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvrotr.dir/lasx-xvrotr.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvrotr.dir/lasx-xvrotr.s @@ -801,7 +801,6 @@ main: # @main st.d $ra, $sp, 248 # 8-byte Folded Spill st.d $fp, $sp, 240 # 8-byte Folded Spill st.d $s0, $sp, 232 # 8-byte Folded Spill - st.d $s1, $sp, 224 # 8-byte Folded Spill addi.d $fp, $sp, 256 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -837,8 +836,8 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $s1, 15 - ori $a0, $s1, 3805 + lu12i.w $a0, 15 + ori $a0, $a0, 3805 xvreplgr2vr.d $xr0, $a0 xvst $xr0, $sp, 160 addi.w $a0, $zero, -1 @@ -1117,8 +1116,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $s1, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvst $xr0, $sp, 160 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvrotr.w $xr0, $xr0, $xr1 @@ -1276,7 +1274,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -256 - ld.d $s1, $sp, 224 # 8-byte Folded Reload ld.d $s0, $sp, 232 # 8-byte Folded Reload ld.d $fp, $sp, 240 # 8-byte Folded Reload ld.d $ra, $sp, 248 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvrotri.dir/lasx-xvrotri.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvrotri.dir/lasx-xvrotri.s index d922cf13..91059ace 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvrotri.dir/lasx-xvrotri.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvrotri.dir/lasx-xvrotri.s @@ -479,8 +479,7 @@ main: # @main lu52i.d $a0, $zero, -2048 xvreplgr2vr.d $xr0, $a0 xvst $xr0, $sp, 128 - lu52i.d $a0, $zero, 1024 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1024 xvrotri.b $xr0, $xr0, 7 xvst $xr0, $sp, 160 addi.d $a0, $sp, 128 @@ -716,8 +715,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3600 xvst $xr0, $sp, 128 xvrepli.w $xr0, 15 xvrotri.w $xr0, $xr0, 20 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsat-1.dir/lasx-xvsat-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsat-1.dir/lasx-xvsat-1.s index dac012b1..4ccf733a 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsat-1.dir/lasx-xvsat-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsat-1.dir/lasx-xvsat-1.s @@ -649,8 +649,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2545 xvst $xr0, $sp, 96 lu12i.w $a0, 3 ori $a0, $a0, 2452 @@ -861,11 +860,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 2047 - pcalau12i $a1, %pc_hi20(.LCPI2_20) - xvld $xr0, $a1, %pc_lo12(.LCPI2_20) - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_20) + xvld $xr0, $a0, %pc_lo12(.LCPI2_20) + xvldi $xr1, -2177 xvst $xr1, $sp, 96 xvsat.w $xr0, $xr0, 23 xvst $xr0, $sp, 128 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsat-2.dir/lasx-xvsat-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsat-2.dir/lasx-xvsat-2.s index 86bc3767..d43d590f 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsat-2.dir/lasx-xvsat-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsat-2.dir/lasx-xvsat-2.s @@ -684,13 +684,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 1 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2529 xvst $xr0, $sp, 96 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvsat.hu $xr0, $xr0, 12 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvseq.dir/lasx-xvseq.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvseq.dir/lasx-xvseq.s index 5286a6ab..1ca2a3a0 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvseq.dir/lasx-xvseq.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvseq.dir/lasx-xvseq.s @@ -993,9 +993,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1552 xvst $xr0, $sp, 64 # 32-byte Folded Spill xvst $xr0, $sp, 192 xvrepli.b $xr1, -1 @@ -1151,8 +1149,7 @@ main: # @main xvld $xr0, $a0, %pc_lo12(.LCPI2_24) xvld $xr1, $sp, 160 # 32-byte Folded Reload xvst $xr1, $sp, 192 - lu12i.w $a0, -524288 - xvreplgr2vr.w $xr1, $a0 + xvldi $xr1, -3200 xvseq.h $xr0, $xr0, $xr1 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 @@ -1313,13 +1310,11 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - xvld $xr0, $sp, 64 # 32-byte Folded Reload - xvst $xr0, $sp, 192 pcalau12i $a0, %pc_hi20(.LCPI2_39) xvld $xr0, $a0, %pc_lo12(.LCPI2_39) - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr1, $a0 + xvld $xr1, $sp, 64 # 32-byte Folded Reload + xvst $xr1, $sp, 192 + xvldi $xr1, -1789 xvseq.w $xr0, $xr1, $xr0 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvseqi.dir/lasx-xvseqi.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvseqi.dir/lasx-xvseqi.s index 0e2d8045..fcf8222a 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvseqi.dir/lasx-xvseqi.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvseqi.dir/lasx-xvseqi.s @@ -382,7 +382,6 @@ main: # @main st.d $ra, $sp, 184 # 8-byte Folded Spill st.d $fp, $sp, 176 # 8-byte Folded Spill st.d $s0, $sp, 168 # 8-byte Folded Spill - st.d $s1, $sp, 160 # 8-byte Folded Spill addi.d $fp, $sp, 192 bstrins.d $sp, $zero, 4, 0 xvrepli.b $xr0, 0 @@ -494,10 +493,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $sp, 96 - ori $s1, $zero, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1552 xvseqi.b $xr0, $xr0, -15 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 @@ -509,8 +505,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $sp, 96 - lu12i.w $a0, -4096 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1544 xvseqi.h $xr0, $xr0, -8 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 @@ -592,8 +587,9 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $sp, 96 - lu32i.d $s1, -32768 - xvreplgr2vr.d $xr0, $s1 + ori $a0, $zero, 0 + lu32i.d $a0, -32768 + xvreplgr2vr.d $xr0, $a0 xvseqi.h $xr0, $xr0, -11 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 @@ -815,8 +811,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $sp, 96 - lu52i.d $a0, $zero, -1025 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -784 xvseqi.d $xr0, $xr0, -3 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 @@ -863,7 +858,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -192 - ld.d $s1, $sp, 160 # 8-byte Folded Reload ld.d $s0, $sp, 168 # 8-byte Folded Reload ld.d $fp, $sp, 176 # 8-byte Folded Reload ld.d $ra, $sp, 184 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvshuf4i_b.dir/lasx-xvshuf4i_b.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvshuf4i_b.dir/lasx-xvshuf4i_b.s index b740ed7f..838cad20 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvshuf4i_b.dir/lasx-xvshuf4i_b.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvshuf4i_b.dir/lasx-xvshuf4i_b.s @@ -636,13 +636,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -16 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1780 xvst $xr0, $sp, 160 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvshuf4i.h $xr0, $xr0, 167 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 @@ -847,13 +843,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 1 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2529 xvst $xr0, $sp, 160 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr1, $a0 + xvldi $xr1, -2305 xvshuf4i.d $xr0, $xr1, 5 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvshuf_b.dir/lasx-xvshuf_b.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvshuf_b.dir/lasx-xvshuf_b.s index 100dff81..8e8a39ee 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvshuf_b.dir/lasx-xvshuf_b.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvshuf_b.dir/lasx-xvshuf_b.s @@ -1163,11 +1163,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - pcalau12i $a1, %pc_hi20(.LCPI2_21) - xvld $xr0, $a1, %pc_lo12(.LCPI2_21) - ori $a0, $a0, 2048 - xvreplgr2vr.h $xr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_21) + xvld $xr0, $a0, %pc_lo12(.LCPI2_21) + xvldi $xr1, -2568 xvst $xr1, $sp, 96 xvld $xr1, $sp, 64 # 32-byte Folded Reload xvshuf.h $xr1, $xr0, $xr0 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsigncov.dir/lasx-xvsigncov.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsigncov.dir/lasx-xvsigncov.s index a3fe79a8..75638829 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsigncov.dir/lasx-xvsigncov.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsigncov.dir/lasx-xvsigncov.s @@ -1124,13 +1124,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_3) xvld $xr0, $a0, %pc_lo12(.LCPI2_3) - xvst $xr0, $sp, 160 pcalau12i $a0, %pc_hi20(.LCPI2_4) - xvld $xr0, $a0, %pc_lo12(.LCPI2_4) - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr1, $a0 - xvsigncov.b $xr0, $xr1, $xr0 + xvld $xr1, $a0, %pc_lo12(.LCPI2_4) + xvst $xr0, $sp, 160 + xvldi $xr0, -2305 + xvsigncov.b $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 addi.d $a1, $sp, 192 @@ -1157,13 +1155,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_8) xvld $xr0, $a0, %pc_lo12(.LCPI2_8) - xvst $xr0, $sp, 160 pcalau12i $a0, %pc_hi20(.LCPI2_9) - xvld $xr0, $a0, %pc_lo12(.LCPI2_9) - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr1, $a0 - xvsigncov.b $xr0, $xr0, $xr1 + xvld $xr1, $a0, %pc_lo12(.LCPI2_9) + xvst $xr0, $sp, 160 + xvldi $xr0, -1777 + xvsigncov.b $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 addi.d $a1, $sp, 192 @@ -1272,13 +1268,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_18) xvld $xr0, $a0, %pc_lo12(.LCPI2_18) - xvst $xr0, $sp, 160 pcalau12i $a0, %pc_hi20(.LCPI2_19) - xvld $xr0, $a0, %pc_lo12(.LCPI2_19) - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr1, $a0 - xvsigncov.b $xr0, $xr1, $xr0 + xvld $xr1, $a0, %pc_lo12(.LCPI2_19) + xvst $xr0, $sp, 160 + xvldi $xr0, -1552 + xvsigncov.b $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 addi.d $a1, $sp, 192 @@ -1670,8 +1664,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvst $xr1, $sp, 160 - lu12i.w $a0, 4080 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1788 xvsigncov.d $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsle-2.dir/lasx-xvsle-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsle-2.dir/lasx-xvsle-2.s index 874bf402..2bc1f943 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsle-2.dir/lasx-xvsle-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsle-2.dir/lasx-xvsle-2.s @@ -969,14 +969,10 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvst $xr0, $sp, 64 # 32-byte Folded Spill xvst $xr0, $sp, 224 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1552 xvld $xr1, $sp, 192 # 32-byte Folded Reload xvsle.wu $xr0, $xr0, $xr1 xvst $xr0, $sp, 256 @@ -1045,12 +1041,11 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 192 # 32-byte Folded Reload xvst $xr0, $sp, 224 - lu12i.w $a0, -4096 - xvreplgr2vr.d $xr0, $a0 lu12i.w $a0, 4096 lu32i.d $a0, 4 - xvreplgr2vr.d $xr1, $a0 - xvsle.du $xr0, $xr0, $xr1 + xvreplgr2vr.d $xr0, $a0 + xvldi $xr1, -1544 + xvsle.du $xr0, $xr1, $xr0 xvst $xr0, $sp, 256 addi.d $a0, $sp, 224 addi.d $a1, $sp, 256 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslei-1.dir/lasx-xvslei-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslei-1.dir/lasx-xvslei-1.s index 6bf422fd..3984bc6a 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslei-1.dir/lasx-xvslei-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslei-1.dir/lasx-xvslei-1.s @@ -716,9 +716,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -65536 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1600 xvst $xr0, $sp, 160 lu52i.d $a0, $zero, -2048 xvreplgr2vr.d $xr0, $a0 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslei-2.dir/lasx-xvslei-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslei-2.dir/lasx-xvslei-2.s index ceca1f6d..2741cf64 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslei-2.dir/lasx-xvslei-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslei-2.dir/lasx-xvslei-2.s @@ -1037,9 +1037,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 128 # 32-byte Folded Reload xvst $xr0, $sp, 160 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvslei.du $xr0, $xr0, 16 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsll.dir/lasx-xvsll.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsll.dir/lasx-xvsll.s index 47db9a19..d610cb2a 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsll.dir/lasx-xvsll.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsll.dir/lasx-xvsll.s @@ -355,9 +355,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslli.dir/lasx-xvslli.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslli.dir/lasx-xvslli.s index f162963f..56f32dc5 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslli.dir/lasx-xvslli.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslli.dir/lasx-xvslli.s @@ -524,9 +524,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 2048 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2568 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsllwil-1.dir/lasx-xvsllwil-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsllwil-1.dir/lasx-xvsllwil-1.s index a4613e6f..b910f084 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsllwil-1.dir/lasx-xvsllwil-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsllwil-1.dir/lasx-xvsllwil-1.s @@ -735,8 +735,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 4 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3776 xvst $xr0, $sp, 128 xvrepli.h $xr0, 1 xvsllwil.w.h $xr0, $xr0, 14 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslt-1.dir/lasx-xvslt-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslt-1.dir/lasx-xvslt-1.s index e8fccd6d..c697599d 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslt-1.dir/lasx-xvslt-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslt-1.dir/lasx-xvslt-1.s @@ -569,9 +569,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvst $xr0, $sp, 192 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvslt.b $xr0, $xr0, $xr1 @@ -667,8 +665,7 @@ main: # @main xvst $xr1, $sp, 64 # 32-byte Folded Spill xvld $xr0, $sp, 160 # 32-byte Folded Reload xvst $xr0, $sp, 192 - ori $a0, $zero, 1024 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2812 xvslt.h $xr0, $xr0, $xr1 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 @@ -785,11 +782,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - pcalau12i $a1, %pc_hi20(.LCPI2_15) - xvld $xr0, $a1, %pc_lo12(.LCPI2_15) - lu32i.d $a0, 0 - xvreplgr2vr.d $xr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_15) + xvld $xr0, $a0, %pc_lo12(.LCPI2_15) + xvldi $xr1, -1777 xvst $xr1, $sp, 192 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvslt.w $xr0, $xr1, $xr0 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslt-2.dir/lasx-xvslt-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslt-2.dir/lasx-xvslt-2.s index 8e3e6790..fb13947a 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslt-2.dir/lasx-xvslt-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslt-2.dir/lasx-xvslt-2.s @@ -1075,8 +1075,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvst $xr1, $sp, 192 - lu52i.d $a0, $zero, -1025 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -784 xvslt.wu $xr0, $xr0, $xr1 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslti-1.dir/lasx-xvslti-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslti-1.dir/lasx-xvslti-1.s index 55cccd1e..621452f2 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslti-1.dir/lasx-xvslti-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslti-1.dir/lasx-xvslti-1.s @@ -836,11 +836,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - pcalau12i $a1, %pc_hi20(.LCPI2_12) - xvld $xr0, $a1, %pc_lo12(.LCPI2_12) - lu32i.d $a0, -1 - xvreplgr2vr.d $xr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_12) + xvld $xr0, $a0, %pc_lo12(.LCPI2_12) + xvldi $xr1, -1552 xvst $xr1, $sp, 160 xvslti.w $xr0, $xr0, 2 xvst $xr0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslti-2.dir/lasx-xvslti-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslti-2.dir/lasx-xvslti-2.s index c5b33b6a..bacbebcf 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslti-2.dir/lasx-xvslti-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvslti-2.dir/lasx-xvslti-2.s @@ -747,9 +747,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1552 xvst $xr0, $sp, 160 ori $a0, $zero, 512 xvreplgr2vr.d $xr0, $a0 @@ -854,9 +852,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 96 # 32-byte Folded Reload xvst $xr0, $sp, 160 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvslti.du $xr0, $xr0, 17 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrai.dir/lasx-xvsrai.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrai.dir/lasx-xvsrai.s index 7b0efafb..313dde48 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrai.dir/lasx-xvsrai.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrai.dir/lasx-xvsrai.s @@ -338,9 +338,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvst $xr0, $sp, 96 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsran.dir/lasx-xvsran.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsran.dir/lasx-xvsran.s index b68c2b8f..4fe8f461 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsran.dir/lasx-xvsran.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsran.dir/lasx-xvsran.s @@ -722,13 +722,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_19) xvld $xr0, $a0, %pc_lo12(.LCPI2_19) - xvst $xr0, $sp, 160 pcalau12i $a0, %pc_hi20(.LCPI2_20) - xvld $xr0, $a0, %pc_lo12(.LCPI2_20) - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr1, $a0 - xvsran.h.w $xr0, $xr1, $xr0 + xvld $xr1, $a0, %pc_lo12(.LCPI2_20) + xvst $xr0, $sp, 160 + xvldi $xr0, -1552 + xvsran.h.w $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 addi.d $a1, $sp, 192 @@ -828,9 +826,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_22) xvld $xr0, $a0, %pc_lo12(.LCPI2_22) xvst $xr0, $sp, 160 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvsran.w.d $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrani.dir/lasx-xvsrani.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrani.dir/lasx-xvsrani.s index ed3b7c63..bc2bfac9 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrani.dir/lasx-xvsrani.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrani.dir/lasx-xvsrani.s @@ -1119,14 +1119,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_15) xvld $xr0, $a0, %pc_lo12(.LCPI2_15) - xvst $xr0, $sp, 160 pcalau12i $a0, %pc_hi20(.LCPI2_16) - xvld $xr0, $a0, %pc_lo12(.LCPI2_16) - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr1, $a0 - xvsrani.b.h $xr1, $xr0, 9 - xvst $xr1, $sp, 192 + xvld $xr1, $a0, %pc_lo12(.LCPI2_16) + xvst $xr0, $sp, 160 + xvldi $xr0, -1789 + xvsrani.b.h $xr0, $xr1, 9 + xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 addi.d $a1, $sp, 192 ori $a2, $zero, 32 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrari.dir/lasx-xvsrari.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrari.dir/lasx-xvsrari.s index 156f3c4d..8df7f306 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrari.dir/lasx-xvsrari.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrari.dir/lasx-xvsrari.s @@ -872,8 +872,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 4 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3776 xvst $xr0, $sp, 128 xvrepli.h $xr0, 1 xvsrari.w $xr0, $xr0, 2 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrarni.dir/lasx-xvsrarni.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrarni.dir/lasx-xvsrarni.s index 351e5d77..f00015a5 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrarni.dir/lasx-xvsrarni.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrarni.dir/lasx-xvsrarni.s @@ -1163,8 +1163,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_37) xvld $xr0, $a0, %pc_lo12(.LCPI2_37) xvst $xr0, $sp, 96 - lu12i.w $a0, 260096 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -1424 xvld $xr1, $sp, 64 # 32-byte Folded Reload xvsrarni.d.q $xr0, $xr1, 39 xvst $xr0, $sp, 128 @@ -1247,14 +1246,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_45) xvld $xr0, $a0, %pc_lo12(.LCPI2_45) - xvst $xr0, $sp, 96 pcalau12i $a0, %pc_hi20(.LCPI2_46) - xvld $xr0, $a0, %pc_lo12(.LCPI2_46) - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr1, $a0 - xvsrarni.d.q $xr1, $xr0, 11 - xvst $xr1, $sp, 128 + xvld $xr1, $a0, %pc_lo12(.LCPI2_46) + xvst $xr0, $sp, 96 + xvldi $xr0, -1552 + xvsrarni.d.q $xr0, $xr1, 11 + xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 addi.d $a1, $sp, 128 ori $a2, $zero, 32 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrli.dir/lasx-xvsrli.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrli.dir/lasx-xvsrli.s index 9228ed6f..2d10f919 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrli.dir/lasx-xvsrli.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrli.dir/lasx-xvsrli.s @@ -537,9 +537,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 31 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2303 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 @@ -560,9 +558,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 3 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2497 xvst $xr0, $sp, 64 xvst $xr0, $sp, 96 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrln.dir/lasx-xvsrln.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrln.dir/lasx-xvsrln.s index 1d2268cb..0fcb7e55 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrln.dir/lasx-xvsrln.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrln.dir/lasx-xvsrln.s @@ -524,9 +524,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 96 # 32-byte Folded Reload xvst $xr1, $sp, 128 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2305 xvst $xr0, $sp, 32 # 32-byte Folded Spill xvsrln.b.h $xr0, $xr1, $xr0 xvst $xr0, $sp, 160 @@ -732,8 +730,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 96 # 32-byte Folded Reload xvst $xr1, $sp, 128 - lu12i.w $a0, 2048 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3456 xvsrln.h.w $xr0, $xr1, $xr0 xvst $xr0, $sp, 160 addi.d $a0, $sp, 128 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlni.dir/lasx-xvsrlni.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlni.dir/lasx-xvsrlni.s index f762230f..130fe9af 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlni.dir/lasx-xvsrlni.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlni.dir/lasx-xvsrlni.s @@ -980,7 +980,6 @@ main: # @main st.d $ra, $sp, 216 # 8-byte Folded Spill st.d $fp, $sp, 208 # 8-byte Folded Spill st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill addi.d $fp, $sp, 224 bstrins.d $sp, $zero, 4, 0 xvrepli.b $xr0, 0 @@ -1194,9 +1193,7 @@ main: # @main xvld $xr0, $a0, %pc_lo12(.LCPI2_23) xvst $xr0, $sp, 32 # 32-byte Folded Spill xvst $xr0, $sp, 128 - lu12i.w $a0, 3 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2497 xvld $xr1, $sp, 96 # 32-byte Folded Reload xvsrlni.h.w $xr0, $xr1, 12 xvst $xr0, $sp, 160 @@ -1224,14 +1221,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_26) xvld $xr0, $a0, %pc_lo12(.LCPI2_26) - xvst $xr0, $sp, 128 pcalau12i $a0, %pc_hi20(.LCPI2_27) - xvld $xr0, $a0, %pc_lo12(.LCPI2_27) - lu12i.w $a0, 4080 - ori $a0, $a0, 255 - xvreplgr2vr.d $xr1, $a0 - xvsrlni.h.w $xr0, $xr1, 7 - xvst $xr0, $sp, 160 + xvld $xr1, $a0, %pc_lo12(.LCPI2_27) + xvst $xr0, $sp, 128 + xvldi $xr0, -1787 + xvsrlni.h.w $xr1, $xr0, 7 + xvst $xr1, $sp, 160 addi.d $a0, $sp, 128 addi.d $a1, $sp, 160 ori $a2, $zero, 32 @@ -1369,12 +1364,8 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_41) xvld $xr0, $a0, %pc_lo12(.LCPI2_41) xvst $xr0, $sp, 128 - ori $a0, $zero, 0 - ori $a1, $zero, 0 - lu32i.d $a1, -1 - xvreplgr2vr.d $xr0, $a1 - lu32i.d $a0, 65535 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr0, -1552 + xvldi $xr1, -1744 xvsrlni.w.d $xr1, $xr0, 45 xvst $xr1, $sp, 160 addi.d $a0, $sp, 128 @@ -1414,9 +1405,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 96 # 32-byte Folded Reload xvst $xr1, $sp, 128 - lu12i.w $a0, 15 - ori $s1, $a0, 4095 - xvreplgr2vr.w $xr0, $s1 + xvldi $xr0, -2305 xvsrlni.d.q $xr1, $xr0, 115 xvst $xr1, $sp, 160 addi.d $a0, $sp, 128 @@ -1602,7 +1591,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_66) xvld $xr0, $a0, %pc_lo12(.LCPI2_66) xvst $xr0, $sp, 128 - xvreplgr2vr.d $xr0, $s1 + xvldi $xr0, -1789 xvld $xr1, $sp, 64 # 32-byte Folded Reload xvsrlni.d.q $xr1, $xr0, 80 xvst $xr1, $sp, 160 @@ -1631,7 +1620,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -224 - ld.d $s1, $sp, 192 # 8-byte Folded Reload ld.d $s0, $sp, 200 # 8-byte Folded Reload ld.d $fp, $sp, 208 # 8-byte Folded Reload ld.d $ra, $sp, 216 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlr.dir/lasx-xvsrlr.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlr.dir/lasx-xvsrlr.s index 6b6c463a..3fd37479 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlr.dir/lasx-xvsrlr.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlr.dir/lasx-xvsrlr.s @@ -1058,11 +1058,9 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - pcalau12i $a1, %pc_hi20(.LCPI2_23) - xvld $xr0, $a1, %pc_lo12(.LCPI2_23) - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_23) + xvld $xr0, $a0, %pc_lo12(.LCPI2_23) + xvldi $xr1, -1789 xvst $xr1, $sp, 128 xvsrlr.h $xr0, $xr1, $xr0 xvst $xr0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlri.dir/lasx-xvsrlri.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlri.dir/lasx-xvsrlri.s index 9eeab0eb..14b7c911 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlri.dir/lasx-xvsrlri.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlri.dir/lasx-xvsrlri.s @@ -706,8 +706,7 @@ main: # @main lu12i.w $a0, 2048 xvreplgr2vr.d $xr0, $a0 xvst $xr0, $sp, 96 - lu12i.w $a0, 4080 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1788 xvsrlri.b $xr0, $xr0, 1 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 @@ -932,8 +931,7 @@ main: # @main lu12i.w $a0, 4096 xvreplgr2vr.d $xr0, $a0 xvst $xr0, $sp, 96 - lu12i.w $a0, -4096 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1544 xvsrlri.d $xr0, $xr0, 40 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlrn.dir/lasx-xvsrlrn.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlrn.dir/lasx-xvsrlrn.s index 8f3911e4..fc95c420 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlrn.dir/lasx-xvsrlrn.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlrn.dir/lasx-xvsrlrn.s @@ -489,8 +489,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvst $xr1, $sp, 160 - lu12i.w $a0, 8 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2688 xvsrlrn.b.h $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 @@ -516,9 +515,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_5) xvld $xr0, $a0, %pc_lo12(.LCPI2_5) xvst $xr0, $sp, 160 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1789 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvsrlrn.b.h $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 @@ -625,9 +622,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvst $xr1, $sp, 160 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvsrlrn.h.w $xr0, $xr1, $xr0 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlrni.dir/lasx-xvsrlrni.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlrni.dir/lasx-xvsrlrni.s index 06309df2..9b207c3d 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlrni.dir/lasx-xvsrlrni.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsrlrni.dir/lasx-xvsrlrni.s @@ -734,14 +734,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_4) xvld $xr0, $a0, %pc_lo12(.LCPI2_4) - xvst $xr0, $sp, 128 pcalau12i $a0, %pc_hi20(.LCPI2_5) - xvld $xr0, $a0, %pc_lo12(.LCPI2_5) - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr1, $a0 - xvsrlrni.b.h $xr0, $xr1, 14 - xvst $xr0, $sp, 160 + xvld $xr1, $a0, %pc_lo12(.LCPI2_5) + xvst $xr0, $sp, 128 + xvldi $xr0, -1789 + xvsrlrni.b.h $xr1, $xr0, 14 + xvst $xr1, $sp, 160 addi.d $a0, $sp, 128 addi.d $a1, $sp, 160 ori $a2, $zero, 32 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrani.dir/lasx-xvssrani.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrani.dir/lasx-xvssrani.s index 3e80de31..ac980d04 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrani.dir/lasx-xvssrani.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrani.dir/lasx-xvssrani.s @@ -2168,8 +2168,7 @@ main: # @main xvst $xr0, $sp, 224 lu12i.w $a0, -64 xvreplgr2vr.w $xr0, $a0 - ori $a0, $zero, 1024 - xvreplgr2vr.h $xr1, $a0 + xvldi $xr1, -2812 xvssrani.bu.h $xr1, $xr0, 0 xvst $xr1, $sp, 256 addi.d $a0, $sp, 224 @@ -2250,8 +2249,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 4 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2752 xvst $xr0, $sp, 224 lu12i.w $a0, 16448 xvreplgr2vr.w $xr0, $a0 @@ -2569,9 +2567,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_111) xvld $xr0, $a0, %pc_lo12(.LCPI2_111) xvst $xr0, $sp, 224 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1789 xvld $xr1, $sp, 192 # 32-byte Folded Reload xvssrani.du.q $xr1, $xr0, 77 xvst $xr1, $sp, 256 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrarn.dir/lasx-xvssrarn.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrarn.dir/lasx-xvssrarn.s index c73f868a..8c8448e3 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrarn.dir/lasx-xvssrarn.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrarn.dir/lasx-xvssrarn.s @@ -627,7 +627,6 @@ main: # @main st.d $ra, $sp, 280 # 8-byte Folded Spill st.d $fp, $sp, 272 # 8-byte Folded Spill st.d $s0, $sp, 264 # 8-byte Folded Spill - st.d $s1, $sp, 256 # 8-byte Folded Spill addi.d $fp, $sp, 288 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -733,7 +732,6 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 160 # 32-byte Folded Reload xvst $xr0, $sp, 192 - ori $s1, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, 1 xvreplgr2vr.d $xr0, $a0 @@ -1063,10 +1061,9 @@ main: # @main xvld $xr0, $sp, 160 # 32-byte Folded Reload xvst $xr0, $sp, 192 lu12i.w $a0, 15 - ori $a1, $a0, 4094 - xvreplgr2vr.d $xr0, $a1 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr1, $a0 + ori $a0, $a0, 4094 + xvreplgr2vr.d $xr0, $a0 + xvldi $xr1, -2305 xvst $xr1, $sp, 32 # 32-byte Folded Spill xvssrarn.bu.h $xr0, $xr0, $xr1 xvst $xr0, $sp, 224 @@ -1106,10 +1103,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_27) xvld $xr0, $a0, %pc_lo12(.LCPI2_27) xvst $xr0, $sp, 192 - ori $a0, $zero, 0 - lu32i.d $a0, 65536 - lu52i.d $a0, $a0, 1021 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -943 xvssrarn.bu.h $xr0, $xr0, $xr0 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 @@ -1207,8 +1201,7 @@ main: # @main xvld $xr0, $a0, %pc_lo12(.LCPI2_33) xvld $xr1, $sp, 160 # 32-byte Folded Reload xvst $xr1, $sp, 192 - lu32i.d $s1, -1 - xvreplgr2vr.d $xr1, $s1 + xvldi $xr1, -1552 xvssrarn.hu.w $xr0, $xr1, $xr0 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 @@ -1447,7 +1440,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -288 - ld.d $s1, $sp, 256 # 8-byte Folded Reload ld.d $s0, $sp, 264 # 8-byte Folded Reload ld.d $fp, $sp, 272 # 8-byte Folded Reload ld.d $ra, $sp, 280 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrarni.dir/lasx-xvssrarni.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrarni.dir/lasx-xvssrarni.s index 7bf138c2..2a2b4101 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrarni.dir/lasx-xvssrarni.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrarni.dir/lasx-xvssrarni.s @@ -1292,8 +1292,7 @@ main: # @main xvld $xr0, $a0, %pc_lo12(.LCPI2_6) xvld $xr1, $sp, 96 # 32-byte Folded Reload xvst $xr1, $sp, 128 - lu52i.d $a0, $zero, -16 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -1664 xvssrarni.b.h $xr0, $xr1, 14 xvst $xr0, $sp, 160 addi.d $a0, $sp, 128 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrln.dir/lasx-xvssrln.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrln.dir/lasx-xvssrln.s index c511f948..30c5cbb5 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrln.dir/lasx-xvssrln.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrln.dir/lasx-xvssrln.s @@ -857,7 +857,6 @@ main: # @main st.d $ra, $sp, 312 # 8-byte Folded Spill st.d $fp, $sp, 304 # 8-byte Folded Spill st.d $s0, $sp, 296 # 8-byte Folded Spill - st.d $s1, $sp, 288 # 8-byte Folded Spill addi.d $fp, $sp, 320 bstrins.d $sp, $zero, 4, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) @@ -881,10 +880,9 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_3) xvld $xr0, $a0, %pc_lo12(.LCPI2_3) xvst $xr0, $sp, 224 - lu12i.w $a0, 2 - xvreplgr2vr.h $xr0, $a0 - xvrepli.b $xr1, 1 - xvssrln.b.h $xr0, $xr0, $xr1 + xvrepli.b $xr0, 1 + xvldi $xr1, -2784 + xvssrln.b.h $xr0, $xr1, $xr0 xvst $xr0, $sp, 256 addi.d $a0, $sp, 224 addi.d $a1, $sp, 256 @@ -1435,11 +1433,8 @@ main: # @main xvld $xr0, $a0, %pc_lo12(.LCPI2_53) pcalau12i $a0, %pc_hi20(.LCPI2_54) xvld $xr1, $a0, %pc_lo12(.LCPI2_54) - addi.w $s1, $zero, -1 xvst $xr0, $sp, 224 - move $a0, $s1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvst $xr0, $sp, 32 # 32-byte Folded Spill xvssrln.bu.h $xr0, $xr1, $xr0 xvst $xr0, $sp, 256 @@ -1454,7 +1449,8 @@ main: # @main xvst $xr0, $sp, 224 lu52i.d $a0, $zero, 2047 xvreplgr2vr.d $xr0, $a0 - lu52i.d $a0, $s1, 2046 + addi.w $a0, $zero, -1 + lu52i.d $a0, $a0, 2046 xvreplgr2vr.d $xr1, $a0 xvssrln.bu.h $xr0, $xr0, $xr1 xvst $xr0, $sp, 256 @@ -1745,7 +1741,6 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero addi.d $sp, $fp, -320 - ld.d $s1, $sp, 288 # 8-byte Folded Reload ld.d $s0, $sp, 296 # 8-byte Folded Reload ld.d $fp, $sp, 304 # 8-byte Folded Reload ld.d $ra, $sp, 312 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrlni.dir/lasx-xvssrlni.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrlni.dir/lasx-xvssrlni.s index 5e06da5a..b6dd86b2 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrlni.dir/lasx-xvssrlni.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrlni.dir/lasx-xvssrlni.s @@ -1511,14 +1511,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_7) xvld $xr0, $a0, %pc_lo12(.LCPI2_7) - xvst $xr0, $sp, 192 pcalau12i $a0, %pc_hi20(.LCPI2_8) - xvld $xr0, $a0, %pc_lo12(.LCPI2_8) - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr1, $a0 - xvssrlni.b.h $xr1, $xr0, 11 - xvst $xr1, $sp, 224 + xvld $xr1, $a0, %pc_lo12(.LCPI2_8) + xvst $xr0, $sp, 192 + xvldi $xr0, -1789 + xvssrlni.b.h $xr0, $xr1, 11 + xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 addi.d $a1, $sp, 224 ori $a2, $zero, 32 @@ -1640,14 +1638,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_20) xvld $xr0, $a0, %pc_lo12(.LCPI2_20) - xvst $xr0, $sp, 192 pcalau12i $a0, %pc_hi20(.LCPI2_21) - xvld $xr0, $a0, %pc_lo12(.LCPI2_21) - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr1, $a0 - xvssrlni.h.w $xr0, $xr1, 2 - xvst $xr0, $sp, 224 + xvld $xr1, $a0, %pc_lo12(.LCPI2_21) + xvst $xr0, $sp, 192 + xvldi $xr0, -1552 + xvssrlni.h.w $xr1, $xr0, 2 + xvst $xr1, $sp, 224 addi.d $a0, $sp, 192 addi.d $a1, $sp, 224 ori $a2, $zero, 32 @@ -1702,9 +1698,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 3 - ori $a0, $a0, 4095 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -2497 xvst $xr0, $sp, 192 xvrepli.d $xr0, -2 xvssrlni.w.d $xr0, $xr0, 50 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrlrn.dir/lasx-xvssrlrn.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrlrn.dir/lasx-xvssrlrn.s index 323adb58..2a64ba0d 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrlrn.dir/lasx-xvssrlrn.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrlrn.dir/lasx-xvssrlrn.s @@ -874,13 +874,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_2) xvld $xr0, $a0, %pc_lo12(.LCPI2_2) - xvst $xr0, $sp, 192 pcalau12i $a0, %pc_hi20(.LCPI2_3) - xvld $xr0, $a0, %pc_lo12(.LCPI2_3) - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr1, $a0 - xvssrlrn.b.h $xr0, $xr1, $xr0 + xvld $xr1, $a0, %pc_lo12(.LCPI2_3) + xvst $xr0, $sp, 192 + xvldi $xr0, -1777 + xvssrlrn.b.h $xr0, $xr0, $xr1 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 addi.d $a1, $sp, 224 @@ -919,13 +917,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_6) xvld $xr0, $a0, %pc_lo12(.LCPI2_6) - xvst $xr0, $sp, 192 pcalau12i $a0, %pc_hi20(.LCPI2_7) - xvld $xr0, $a0, %pc_lo12(.LCPI2_7) - ori $a0, $zero, 0 - lu32i.d $a0, -1 - xvreplgr2vr.d $xr1, $a0 - xvssrlrn.b.h $xr0, $xr0, $xr1 + xvld $xr1, $a0, %pc_lo12(.LCPI2_7) + xvst $xr0, $sp, 192 + xvldi $xr0, -1552 + xvssrlrn.b.h $xr0, $xr1, $xr0 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 addi.d $a1, $sp, 224 @@ -1556,13 +1552,12 @@ main: # @main jirl $ra, $ra, 0 xvld $xr0, $sp, 96 # 32-byte Folded Reload xvst $xr0, $sp, 192 - ori $a0, $zero, 512 - xvreplgr2vr.w $xr0, $a0 lu12i.w $a0, -16384 lu32i.d $a0, -1024 lu52i.d $a0, $a0, -161 - xvreplgr2vr.d $xr1, $a0 - xvssrlrn.wu.d $xr0, $xr0, $xr1 + xvreplgr2vr.d $xr0, $a0 + xvldi $xr1, -3838 + xvssrlrn.wu.d $xr0, $xr1, $xr0 xvst $xr0, $sp, 224 addi.d $a0, $sp, 192 addi.d $a1, $sp, 224 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrlrni.dir/lasx-xvssrlrni.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrlrni.dir/lasx-xvssrlrni.s index d3566aa1..ed69a406 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrlrni.dir/lasx-xvssrlrni.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssrlrni.dir/lasx-xvssrlrni.s @@ -1345,14 +1345,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_6) xvld $xr0, $a0, %pc_lo12(.LCPI2_6) - xvst $xr0, $sp, 192 pcalau12i $a0, %pc_hi20(.LCPI2_7) - xvld $xr0, $a0, %pc_lo12(.LCPI2_7) - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - xvreplgr2vr.d $xr1, $a0 - xvssrlrni.bu.h $xr0, $xr1, 1 - xvst $xr0, $sp, 224 + xvld $xr1, $a0, %pc_lo12(.LCPI2_7) + xvst $xr0, $sp, 192 + xvldi $xr0, -1789 + xvssrlrni.bu.h $xr1, $xr0, 1 + xvst $xr1, $sp, 224 addi.d $a0, $sp, 192 addi.d $a1, $sp, 224 ori $a2, $zero, 32 @@ -1513,8 +1511,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_21) xvld $xr0, $a0, %pc_lo12(.LCPI2_21) xvst $xr0, $sp, 192 - lu12i.w $a0, 262400 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1278 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvssrlrni.wu.d $xr0, $xr1, 7 xvst $xr0, $sp, 224 @@ -1566,8 +1563,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_25) xvld $xr0, $a0, %pc_lo12(.LCPI2_25) xvst $xr0, $sp, 192 - ori $a0, $zero, 1024 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2812 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvssrlrni.wu.d $xr0, $xr1, 51 xvst $xr0, $sp, 224 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssub-1.dir/lasx-xvssub-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssub-1.dir/lasx-xvssub-1.s index 7f5387ff..544d40d7 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssub-1.dir/lasx-xvssub-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvssub-1.dir/lasx-xvssub-1.s @@ -943,8 +943,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_32) xvld $xr1, $a0, %pc_lo12(.LCPI2_32) xvst $xr0, $sp, 96 - lu12i.w $a0, -524288 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3200 xvssub.d $xr0, $xr1, $xr0 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwev-1.dir/lasx-xvsubwev-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwev-1.dir/lasx-xvsubwev-1.s index 1e8c95cd..4bccfbe7 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwev-1.dir/lasx-xvsubwev-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwev-1.dir/lasx-xvsubwev-1.s @@ -891,8 +891,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_16) xvld $xr1, $a0, %pc_lo12(.LCPI2_16) xvst $xr0, $sp, 160 - lu12i.w $a0, -4096 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1544 xvsubwev.w.h $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 @@ -932,8 +931,7 @@ main: # @main jirl $ra, $ra, 0 xvld $xr1, $sp, 128 # 32-byte Folded Reload xvst $xr1, $sp, 160 - lu52i.d $a0, $zero, -1025 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -784 xvsubwev.w.h $xr0, $xr0, $xr1 xvst $xr0, $sp, 192 addi.d $a0, $sp, 160 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwev-2.dir/lasx-xvsubwev-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwev-2.dir/lasx-xvsubwev-2.s index 1045d2e7..cd9ba764 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwev-2.dir/lasx-xvsubwev-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwev-2.dir/lasx-xvsubwev-2.s @@ -1038,8 +1038,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_21) xvld $xr0, $a0, %pc_lo12(.LCPI2_21) xvst $xr0, $sp, 96 - lu52i.d $a0, $zero, 1024 - xvreplgr2vr.d $xr1, $a0 + xvldi $xr1, -1024 xvsubwev.w.hu $xr0, $xr0, $xr1 xvst $xr0, $sp, 128 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwod-1.dir/lasx-xvsubwod-1.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwod-1.dir/lasx-xvsubwod-1.s index b4712421..74883ec2 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwod-1.dir/lasx-xvsubwod-1.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwod-1.dir/lasx-xvsubwod-1.s @@ -1425,8 +1425,7 @@ main: # @main jirl $ra, $ra, 0 xvrepli.d $xr0, -512 xvst $xr0, $sp, 224 - ori $a0, $zero, 512 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3838 xvld $xr1, $sp, 192 # 32-byte Folded Reload xvsubwod.d.w $xr0, $xr1, $xr0 xvst $xr0, $sp, 256 @@ -1527,8 +1526,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_51) xvld $xr0, $a0, %pc_lo12(.LCPI2_51) xvst $xr0, $sp, 224 - lu12i.w $a0, -524288 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3200 xvld $xr1, $sp, 192 # 32-byte Folded Reload xvsubwod.q.d $xr0, $xr0, $xr1 xvst $xr0, $sp, 256 diff --git a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwod-2.dir/lasx-xvsubwod-2.s b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwod-2.dir/lasx-xvsubwod-2.s index cbbcc3c5..b8b554ac 100644 --- a/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwod-2.dir/lasx-xvsubwod-2.s +++ b/results/SingleSource/UnitTests/Vector/LASX/CMakeFiles/Vector-LASX-lasx-xvsubwod-2.dir/lasx-xvsubwod-2.s @@ -903,11 +903,10 @@ main: # @main pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 lu12i.w $a0, 15 - ori $a1, $a0, 3841 - xvreplgr2vr.w $xr0, $a1 - xvst $xr0, $sp, 192 - ori $a0, $a0, 4095 + ori $a0, $a0, 3841 xvreplgr2vr.w $xr0, $a0 + xvst $xr0, $sp, 192 + xvldi $xr0, -2305 xvld $xr1, $sp, 160 # 32-byte Folded Reload xvsubwod.h.bu $xr0, $xr1, $xr0 xvst $xr0, $sp, 224 @@ -1171,9 +1170,7 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(check_lasx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - xvreplgr2vr.d $xr0, $a0 + xvldi $xr0, -1777 xvst $xr0, $sp, 128 # 32-byte Folded Spill xvst $xr0, $sp, 192 xvld $xr0, $sp, 96 # 32-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vabsd-1.dir/lsx-vabsd-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vabsd-1.dir/lsx-vabsd-1.s index 84b1e832..c3058eb5 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vabsd-1.dir/lsx-vabsd-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vabsd-1.dir/lsx-vabsd-1.s @@ -481,10 +481,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -128 - st.d $ra, $sp, 120 # 8-byte Folded Spill - st.d $fp, $sp, 112 # 8-byte Folded Spill - st.d $s0, $sp, 104 # 8-byte Folded Spill + addi.d $sp, $sp, -112 + st.d $ra, $sp, 104 # 8-byte Folded Spill + st.d $fp, $sp, 96 # 8-byte Folded Spill vrepli.b $vr0, 0 vst $vr0, $sp, 48 # 16-byte Folded Spill vst $vr0, $sp, 64 @@ -700,11 +699,8 @@ main: # @main ori $a0, $a0, 1 lu32i.d $a0, 1 vreplgr2vr.d $vr0, $a0 - addi.w $s0, $zero, -1 vst $vr0, $sp, 64 - move $a0, $s0 - lu32i.d $a0, 65535 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1729 vld $vr1, $sp, 48 # 16-byte Folded Reload vabsd.h $vr0, $vr1, $vr0 vst $vr0, $sp, 80 @@ -747,8 +743,9 @@ main: # @main vst $vr0, $sp, 64 pcalau12i $a0, %pc_hi20(.LCPI2_22) vld $vr0, $a0, %pc_lo12(.LCPI2_22) - lu32i.d $s0, -65536 - lu52i.d $a0, $s0, 3 + addi.w $a0, $zero, -1 + lu32i.d $a0, -65536 + lu52i.d $a0, $a0, 3 vreplgr2vr.d $vr1, $a0 vabsd.h $vr0, $vr1, $vr0 vst $vr0, $sp, 80 @@ -884,10 +881,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 104 # 8-byte Folded Reload - ld.d $fp, $sp, 112 # 8-byte Folded Reload - ld.d $ra, $sp, 120 # 8-byte Folded Reload - addi.d $sp, $sp, 128 + ld.d $fp, $sp, 96 # 8-byte Folded Reload + ld.d $ra, $sp, 104 # 8-byte Folded Reload + addi.d $sp, $sp, 112 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vabsd-2.dir/lsx-vabsd-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vabsd-2.dir/lsx-vabsd-2.s index 7be0c8ec..c6dd9fe1 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vabsd-2.dir/lsx-vabsd-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vabsd-2.dir/lsx-vabsd-2.s @@ -755,8 +755,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -4096 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3073 vst $vr0, $sp, 64 vld $vr1, $sp, 48 # 16-byte Folded Reload vabsd.hu $vr0, $vr1, $vr0 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vadda.dir/lsx-vadda.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vadda.dir/lsx-vadda.s index e92b8994..d2e021ba 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vadda.dir/lsx-vadda.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vadda.dir/lsx-vadda.s @@ -653,8 +653,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 8 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3712 vst $vr0, $sp, 64 vld $vr1, $sp, 48 # 16-byte Folded Reload vadda.b $vr0, $vr1, $vr0 @@ -685,11 +684,10 @@ main: # @main lu12i.w $a0, -523296 vreplgr2vr.w $vr0, $a0 vst $vr0, $sp, 64 - lu12i.w $a0, 1008 - vreplgr2vr.w $vr0, $a0 lu12i.w $a0, 524272 - vreplgr2vr.w $vr1, $a0 - vadda.h $vr0, $vr1, $vr0 + vreplgr2vr.w $vr0, $a0 + vldi $vr1, -3521 + vadda.h $vr0, $vr0, $vr1 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 addi.d $a1, $sp, 80 @@ -887,8 +885,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_30) vld $vr1, $a0, %pc_lo12(.LCPI2_30) vst $vr0, $sp, 64 - lu12i.w $a0, 40960 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3318 vadda.w $vr0, $vr1, $vr0 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwev-1.dir/lsx-vaddwev-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwev-1.dir/lsx-vaddwev-1.s index 693214d5..3187b95b 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwev-1.dir/lsx-vaddwev-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwev-1.dir/lsx-vaddwev-1.s @@ -502,8 +502,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr1, $sp, 48 # 16-byte Folded Reload vst $vr1, $sp, 64 - ori $a0, $zero, 2048 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2808 vaddwev.h.b $vr0, $vr0, $vr1 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 @@ -567,10 +566,10 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vldi $vr0, -1552 + vst $vr0, $sp, 64 ori $a0, $zero, 0 lu32i.d $a0, -1 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 64 lu52i.d $a0, $a0, 3 vreplgr2vr.d $vr0, $a0 vld $vr1, $sp, 48 # 16-byte Folded Reload @@ -652,9 +651,7 @@ main: # @main vrepli.b $vr0, -1 vst $vr0, $sp, 32 # 16-byte Folded Spill vst $vr0, $sp, 64 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1777 vld $vr1, $sp, 48 # 16-byte Folded Reload vaddwev.d.w $vr0, $vr0, $vr1 vst $vr0, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwev-2.dir/lsx-vaddwev-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwev-2.dir/lsx-vaddwev-2.s index d0e112cb..f2c2de87 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwev-2.dir/lsx-vaddwev-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwev-2.dir/lsx-vaddwev-2.s @@ -485,8 +485,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr1, $sp, 80 # 16-byte Folded Reload vst $vr1, $sp, 96 - ori $a0, $zero, 1024 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2812 vaddwev.h.bu $vr0, $vr0, $vr1 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 @@ -525,8 +524,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr1, $sp, 80 # 16-byte Folded Reload vst $vr1, $sp, 96 - lu12i.w $a0, 1 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2800 vaddwev.h.bu $vr0, $vr1, $vr0 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 @@ -662,9 +660,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -2305 vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload vld $vr1, $sp, 80 # 16-byte Folded Reload @@ -703,9 +699,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1777 vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload vld $vr1, $sp, 80 # 16-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwev-3.dir/lsx-vaddwev-3.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwev-3.dir/lsx-vaddwev-3.s index 7d999d39..a54b9cad 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwev-3.dir/lsx-vaddwev-3.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwev-3.dir/lsx-vaddwev-3.s @@ -945,9 +945,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 3840 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3585 vst $vr0, $sp, 64 vrepli.h $vr0, -256 vld $vr1, $sp, 48 # 16-byte Folded Reload @@ -1071,11 +1069,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - pcalau12i $a1, %pc_hi20(.LCPI2_40) - vld $vr0, $a1, %pc_lo12(.LCPI2_40) - lu32i.d $a0, 0 - vreplgr2vr.d $vr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_40) + vld $vr0, $a0, %pc_lo12(.LCPI2_40) + vldi $vr1, -1777 vst $vr1, $sp, 64 vld $vr1, $sp, 48 # 16-byte Folded Reload vaddwev.d.wu.w $vr0, $vr0, $vr1 @@ -1189,9 +1185,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 4080 - ori $a0, $a0, 255 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1787 vst $vr0, $sp, 64 vrepli.h $vr0, 255 vld $vr1, $sp, 48 # 16-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwod-1.dir/lsx-vaddwod-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwod-1.dir/lsx-vaddwod-1.s index 652e0b1f..edb067dd 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwod-1.dir/lsx-vaddwod-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwod-1.dir/lsx-vaddwod-1.s @@ -660,11 +660,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - ori $a1, $zero, 0 - lu32i.d $a1, 65535 - vreplgr2vr.d $vr0, $a1 + vldi $vr0, -1744 vst $vr0, $sp, 96 + ori $a0, $zero, 0 lu32i.d $a0, -1 lu52i.d $a0, $a0, 3 vreplgr2vr.d $vr0, $a0 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwod-3.dir/lsx-vaddwod-3.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwod-3.dir/lsx-vaddwod-3.s index aab277ec..2ec0b801 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwod-3.dir/lsx-vaddwod-3.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vaddwod-3.dir/lsx-vaddwod-3.s @@ -459,20 +459,16 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -128 - st.d $ra, $sp, 120 # 8-byte Folded Spill - st.d $fp, $sp, 112 # 8-byte Folded Spill - st.d $s0, $sp, 104 # 8-byte Folded Spill + addi.d $sp, $sp, -112 + st.d $ra, $sp, 104 # 8-byte Folded Spill + st.d $fp, $sp, 96 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) - vld $vr0, $a0, %pc_lo12(.LCPI2_0) - vst $vr0, $sp, 32 # 16-byte Folded Spill - vst $vr0, $sp, 64 + vld $vr1, $a0, %pc_lo12(.LCPI2_0) + vst $vr1, $sp, 32 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_1) vld $vr0, $a0, %pc_lo12(.LCPI2_1) - ori $s0, $zero, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - vreplgr2vr.d $vr1, $a0 + vst $vr1, $sp, 64 + vldi $vr1, -1552 vaddwod.h.bu.b $vr0, $vr1, $vr0 vst $vr0, $sp, 80 pcalau12i $a0, %pc_hi20(.L.str.5) @@ -612,11 +608,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - pcalau12i $a1, %pc_hi20(.LCPI2_16) - vld $vr0, $a1, %pc_lo12(.LCPI2_16) - ori $a0, $a0, 4095 - vreplgr2vr.w $vr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_16) + vld $vr0, $a0, %pc_lo12(.LCPI2_16) + vldi $vr1, -2305 vst $vr1, $sp, 64 vld $vr1, $sp, 48 # 16-byte Folded Reload vaddwod.w.hu.h $vr0, $vr0, $vr1 @@ -798,8 +792,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_32) vld $vr1, $a0, %pc_lo12(.LCPI2_32) vst $vr0, $sp, 64 - lu32i.d $s0, -65536 - vreplgr2vr.d $vr0, $s0 + vldi $vr0, -1600 vaddwod.q.du.d $vr0, $vr0, $vr1 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 @@ -810,10 +803,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 104 # 8-byte Folded Reload - ld.d $fp, $sp, 112 # 8-byte Folded Reload - ld.d $ra, $sp, 120 # 8-byte Folded Reload - addi.d $sp, $sp, 128 + ld.d $fp, $sp, 96 # 8-byte Folded Reload + ld.d $ra, $sp, 104 # 8-byte Folded Reload + addi.d $sp, $sp, 112 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vand.dir/lsx-vand.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vand.dir/lsx-vand.s index 41f59548..1f82abea 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vand.dir/lsx-vand.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vand.dir/lsx-vand.s @@ -290,9 +290,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 511 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -2273 vst $vr0, $sp, 48 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vavg-1.dir/lsx-vavg-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vavg-1.dir/lsx-vavg-1.s index fe01c2ff..29f9084b 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vavg-1.dir/lsx-vavg-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vavg-1.dir/lsx-vavg-1.s @@ -504,10 +504,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -128 - st.d $ra, $sp, 120 # 8-byte Folded Spill - st.d $fp, $sp, 112 # 8-byte Folded Spill - st.d $s0, $sp, 104 # 8-byte Folded Spill + addi.d $sp, $sp, -112 + st.d $ra, $sp, 104 # 8-byte Folded Spill + st.d $fp, $sp, 96 # 8-byte Folded Spill vrepli.b $vr0, 0 vst $vr0, $sp, 48 # 16-byte Folded Spill vst $vr0, $sp, 64 @@ -822,9 +821,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $s0, $zero, 0 - lu32i.d $s0, -1 - lu52i.d $a0, $s0, 3 + ori $a0, $zero, 0 + lu32i.d $a0, -1 + lu52i.d $a0, $a0, 3 vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 64 vavg.w $vr0, $vr0, $vr0 @@ -847,8 +846,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 8 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3712 vst $vr0, $sp, 64 vrepli.h $vr0, 1 vld $vr1, $sp, 48 # 16-byte Folded Reload @@ -1039,8 +1037,7 @@ main: # @main vst $vr0, $sp, 64 lu12i.w $a0, 522240 vreplgr2vr.w $vr0, $a0 - lu12i.w $a0, 262144 - vreplgr2vr.d $vr1, $a0 + vldi $vr1, -1280 vavg.d $vr0, $vr1, $vr0 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 @@ -1085,7 +1082,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_53) vld $vr1, $a0, %pc_lo12(.LCPI2_53) vst $vr0, $sp, 64 - vreplgr2vr.d $vr0, $s0 + vldi $vr0, -1552 vavg.d $vr0, $vr0, $vr1 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 @@ -1096,10 +1093,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 104 # 8-byte Folded Reload - ld.d $fp, $sp, 112 # 8-byte Folded Reload - ld.d $ra, $sp, 120 # 8-byte Folded Reload - addi.d $sp, $sp, 128 + ld.d $fp, $sp, 96 # 8-byte Folded Reload + ld.d $ra, $sp, 104 # 8-byte Folded Reload + addi.d $sp, $sp, 112 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vavgr-1.dir/lsx-vavgr-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vavgr-1.dir/lsx-vavgr-1.s index e20deea6..6b0e7498 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vavgr-1.dir/lsx-vavgr-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vavgr-1.dir/lsx-vavgr-1.s @@ -773,8 +773,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_25) vld $vr0, $a0, %pc_lo12(.LCPI2_25) vst $vr1, $sp, 80 - lu12i.w $a0, 479232 - vreplgr2vr.w $vr1, $a0 + vldi $vr1, -3211 vavgr.w $vr0, $vr0, $vr1 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 @@ -832,8 +831,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 261120 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -1416 vst $vr0, $sp, 80 lu12i.w $a0, 522240 vreplgr2vr.w $vr0, $a0 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vavgr-2.dir/lsx-vavgr-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vavgr-2.dir/lsx-vavgr-2.s index 3b5e9f94..7248ae68 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vavgr-2.dir/lsx-vavgr-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vavgr-2.dir/lsx-vavgr-2.s @@ -534,9 +534,7 @@ main: # @main ori $a0, $a0, 128 vreplgr2vr.w $vr0, $a0 vst $vr0, $sp, 64 - lu12i.w $a0, 3 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -2497 vld $vr1, $sp, 48 # 16-byte Folded Reload vavgr.bu $vr0, $vr0, $vr1 vst $vr0, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitclri.dir/lsx-vbitclri.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitclri.dir/lsx-vbitclri.s index 7c24efcb..50ee9be4 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitclri.dir/lsx-vbitclri.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitclri.dir/lsx-vbitclri.s @@ -480,8 +480,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 1 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2800 vst $vr0, $sp, 48 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitrev.dir/lsx-vbitrev.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitrev.dir/lsx-vbitrev.s index cab6c7b6..052e0d0f 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitrev.dir/lsx-vbitrev.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitrev.dir/lsx-vbitrev.s @@ -470,8 +470,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 8 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2688 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitrevi.dir/lsx-vbitrevi.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitrevi.dir/lsx-vbitrevi.s index 40efedb2..fde82a92 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitrevi.dir/lsx-vbitrevi.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitrevi.dir/lsx-vbitrevi.s @@ -297,18 +297,16 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -48 + st.d $ra, $sp, 40 # 8-byte Folded Spill + st.d $fp, $sp, 32 # 8-byte Folded Spill vrepli.b $vr0, 2 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $fp, $a0, %pc_lo12(.L.str.5) - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 22 move $a3, $fp @@ -316,10 +314,10 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 27 move $a3, $fp @@ -327,30 +325,30 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_1) vld $vr0, $a0, %pc_lo12(.LCPI2_1) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 32 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.b $vr0, 64 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 37 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.b $vr0, 8 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 42 move $a3, $fp @@ -361,20 +359,20 @@ main: # @main lu32i.d $a0, 263428 lu52i.d $a0, $a0, 64 vreplgr2vr.d $vr0, $a0 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 47 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.b $vr0, -65 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 52 move $a3, $fp @@ -382,20 +380,20 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_2) vld $vr0, $a0, %pc_lo12(.LCPI2_2) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 57 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.b $vr0, -128 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 62 move $a3, $fp @@ -403,10 +401,10 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_3) vld $vr0, $a0, %pc_lo12(.LCPI2_3) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 67 move $a3, $fp @@ -414,21 +412,20 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_4) vld $vr0, $a0, %pc_lo12(.LCPI2_4) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 72 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 4 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2752 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 77 move $a3, $fp @@ -436,21 +433,20 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_5) vld $vr0, $a0, %pc_lo12(.LCPI2_5) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 82 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 1024 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2812 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 87 move $a3, $fp @@ -458,10 +454,10 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_6) vld $vr0, $a0, %pc_lo12(.LCPI2_6) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 92 move $a3, $fp @@ -469,20 +465,20 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_7) vld $vr0, $a0, %pc_lo12(.LCPI2_7) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 97 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.h $vr0, 64 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 102 move $a3, $fp @@ -490,10 +486,10 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_8) vld $vr0, $a0, %pc_lo12(.LCPI2_8) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 107 move $a3, $fp @@ -501,10 +497,10 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_9) vld $vr0, $a0, %pc_lo12(.LCPI2_9) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 112 move $a3, $fp @@ -514,32 +510,30 @@ main: # @main lu32i.d $a0, 16400 lu52i.d $a0, $a0, 1916 vreplgr2vr.d $vr0, $a0 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 117 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $s1, 1 - vreplgr2vr.h $vr0, $s1 + vldi $vr0, -2800 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 122 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $s0, 2 - vreplgr2vr.h $vr0, $s0 + vldi $vr0, -2784 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 127 move $a3, $fp @@ -547,10 +541,10 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_10) vld $vr0, $a0, %pc_lo12(.LCPI2_10) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 132 move $a3, $fp @@ -558,20 +552,20 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_11) vld $vr0, $a0, %pc_lo12(.LCPI2_11) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 137 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.w $vr0, 1 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 142 move $a3, $fp @@ -579,21 +573,20 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_12) vld $vr0, $a0, %pc_lo12(.LCPI2_12) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 147 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 256 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3568 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 152 move $a3, $fp @@ -601,10 +594,10 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_13) vld $vr0, $a0, %pc_lo12(.LCPI2_13) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 157 move $a3, $fp @@ -612,31 +605,32 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_14) vld $vr0, $a0, %pc_lo12(.LCPI2_14) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 162 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu32i.d $s1, -4097 - vreplgr2vr.d $vr0, $s1 + lu12i.w $a0, 1 + lu32i.d $a0, -4097 + vreplgr2vr.d $vr0, $a0 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 167 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.w $vr0, 256 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 172 move $a3, $fp @@ -644,10 +638,10 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_15) vld $vr0, $a0, %pc_lo12(.LCPI2_15) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 177 move $a3, $fp @@ -655,20 +649,20 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_16) vld $vr0, $a0, %pc_lo12(.LCPI2_16) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 182 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.w $vr0, -129 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 187 move $a3, $fp @@ -677,10 +671,10 @@ main: # @main addi.w $a0, $zero, -1 lu52i.d $a0, $a0, -17 vreplgr2vr.d $vr0, $a0 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 192 move $a3, $fp @@ -688,10 +682,10 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_17) vld $vr0, $a0, %pc_lo12(.LCPI2_17) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 197 move $a3, $fp @@ -702,21 +696,20 @@ main: # @main lu32i.d $a0, -522232 lu52i.d $a0, $a0, 128 vreplgr2vr.d $vr0, $a0 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 202 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 262144 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1280 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 207 move $a3, $fp @@ -724,20 +717,20 @@ main: # @main jirl $ra, $ra, 0 lu12i.w $a0, 32 vreplgr2vr.d $vr0, $a0 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 212 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.d $vr0, 256 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 217 move $a3, $fp @@ -746,20 +739,21 @@ main: # @main ori $a0, $zero, 0 lu32i.d $a0, 2 vreplgr2vr.d $vr0, $a0 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 222 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vreplgr2vr.d $vr0, $s0 + lu12i.w $a0, 2 + vreplgr2vr.d $vr0, $a0 + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 227 move $a3, $fp @@ -767,10 +761,10 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_18) vld $vr0, $a0, %pc_lo12(.LCPI2_18) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 232 move $a3, $fp @@ -778,10 +772,10 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_19) vld $vr0, $a0, %pc_lo12(.LCPI2_19) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 237 move $a3, $fp @@ -789,21 +783,19 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_20) vld $vr0, $a0, %pc_lo12(.LCPI2_20) + vst $vr0, $sp, 0 vst $vr0, $sp, 16 - vst $vr0, $sp, 32 - addi.d $a0, $sp, 16 - addi.d $a1, $sp, 32 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 242 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $fp, $sp, 32 # 8-byte Folded Reload + ld.d $ra, $sp, 40 # 8-byte Folded Reload + addi.d $sp, $sp, 48 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitseti.dir/lsx-vbitseti.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitseti.dir/lsx-vbitseti.s index 7f1b7555..7fbfd297 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitseti.dir/lsx-vbitseti.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vbitseti.dir/lsx-vbitseti.s @@ -401,8 +401,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 1 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2800 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 @@ -412,8 +411,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 2048 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2808 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 @@ -434,8 +432,8 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $s0, 4096 - ori $a0, $s0, 257 + lu12i.w $a0, 4096 + ori $a0, $a0, 257 lu32i.d $a0, 256 lu52i.d $a0, $a0, 16 vreplgr2vr.d $vr0, $a0 @@ -459,8 +457,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 16 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3583 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 @@ -470,8 +467,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 64 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3580 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 @@ -514,7 +510,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vreplgr2vr.w $vr0, $s0 + vldi $vr0, -3327 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vdiv-1.dir/lsx-vdiv-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vdiv-1.dir/lsx-vdiv-1.s index 64131780..969e7f35 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vdiv-1.dir/lsx-vdiv-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vdiv-1.dir/lsx-vdiv-1.s @@ -539,8 +539,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -524288 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3200 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vexth-2.dir/lsx-vexth-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vexth-2.dir/lsx-vexth-2.s index 3da24c32..ad37cee9 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vexth-2.dir/lsx-vexth-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vexth-2.dir/lsx-vexth-2.s @@ -505,9 +505,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1777 vst $vr0, $sp, 48 vrepli.b $vr0, -1 vexth.du.wu $vr0, $vr0 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vextrins.dir/lsx-vextrins.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vextrins.dir/lsx-vextrins.s index 31337949..ac30e97f 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vextrins.dir/lsx-vextrins.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vextrins.dir/lsx-vextrins.s @@ -933,8 +933,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_30) vld $vr0, $a0, %pc_lo12(.LCPI2_30) vst $vr0, $sp, 112 - ori $a0, $zero, 512 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2814 vld $vr1, $sp, 64 # 16-byte Folded Reload vextrins.w $vr1, $vr0, 3 vst $vr1, $sp, 128 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfadd_d.dir/lsx-vfadd_d.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfadd_d.dir/lsx-vfadd_d.s index caf4fc5a..26bee46d 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfadd_d.dir/lsx-vfadd_d.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfadd_d.dir/lsx-vfadd_d.s @@ -322,8 +322,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 1 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2800 vst $vr0, $sp, 80 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfadd_s.dir/lsx-vfadd_s.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfadd_s.dir/lsx-vfadd_s.s index 8891381b..5b5eb0c2 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfadd_s.dir/lsx-vfadd_s.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfadd_s.dir/lsx-vfadd_s.s @@ -573,8 +573,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 260096 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -1424 vst $vr0, $sp, 48 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfclass_d.dir/lsx-vfclass_d.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfclass_d.dir/lsx-vfclass_d.s index a998ff00..176828ad 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfclass_d.dir/lsx-vfclass_d.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfclass_d.dir/lsx-vfclass_d.s @@ -245,9 +245,7 @@ main: # @main st.d $fp, $sp, 80 # 8-byte Folded Spill vrepli.d $vr0, 256 vst $vr0, $sp, 48 - lu12i.w $a0, 31 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -2303 vfclass.d $vr0, $vr0 vst $vr0, $sp, 64 pcalau12i $a0, %pc_hi20(.L.str.5) diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfclass_s.dir/lsx-vfclass_s.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfclass_s.dir/lsx-vfclass_s.s index 840645e9..16466ecd 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfclass_s.dir/lsx-vfclass_s.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfclass_s.dir/lsx-vfclass_s.s @@ -255,32 +255,32 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -96 - st.d $ra, $sp, 88 # 8-byte Folded Spill - st.d $fp, $sp, 80 # 8-byte Folded Spill + addi.d $sp, $sp, -80 + st.d $ra, $sp, 72 # 8-byte Folded Spill + st.d $fp, $sp, 64 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) vld $vr1, $a0, %pc_lo12(.LCPI2_1) - vst $vr0, $sp, 48 + vst $vr0, $sp, 32 vfclass.s $vr0, $vr1 - vst $vr0, $sp, 64 + vst $vr0, $sp, 48 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $fp, $a0, %pc_lo12(.L.str.5) - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + addi.d $a0, $sp, 32 + addi.d $a1, $sp, 48 ori $a2, $zero, 16 ori $a4, $zero, 22 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.w $vr0, 8 - vst $vr0, $sp, 48 + vst $vr0, $sp, 32 vrepli.h $vr0, -510 vfclass.s $vr0, $vr0 - vst $vr0, $sp, 64 - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + vst $vr0, $sp, 48 + addi.d $a0, $sp, 32 + addi.d $a1, $sp, 48 ori $a2, $zero, 16 ori $a4, $zero, 27 move $a3, $fp @@ -290,37 +290,35 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_2) pcalau12i $a0, %pc_hi20(.LCPI2_3) vld $vr1, $a0, %pc_lo12(.LCPI2_3) - vst $vr0, $sp, 48 + vst $vr0, $sp, 32 vfclass.s $vr0, $vr1 - vst $vr0, $sp, 64 - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + vst $vr0, $sp, 48 + addi.d $a0, $sp, 32 + addi.d $a1, $sp, 48 ori $a2, $zero, 16 ori $a4, $zero, 32 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 512 - vreplgr2vr.w $vr0, $a0 - vst $vr0, $sp, 32 # 16-byte Folded Spill - vst $vr0, $sp, 48 + vldi $vr0, -3838 + vst $vr0, $sp, 32 vrepli.b $vr0, 0 vfclass.s $vr0, $vr0 vst $vr0, $sp, 16 # 16-byte Folded Spill - vst $vr0, $sp, 64 - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + vst $vr0, $sp, 48 + addi.d $a0, $sp, 32 + addi.d $a1, $sp, 48 ori $a2, $zero, 16 ori $a4, $zero, 37 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 32 # 16-byte Folded Reload - vst $vr0, $sp, 48 + vldi $vr0, -3838 + vst $vr0, $sp, 32 vld $vr0, $sp, 16 # 16-byte Folded Reload - vst $vr0, $sp, 64 - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + vst $vr0, $sp, 48 + addi.d $a0, $sp, 32 + addi.d $a1, $sp, 48 ori $a2, $zero, 16 ori $a4, $zero, 42 move $a3, $fp @@ -329,20 +327,20 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_4) vld $vr0, $a0, %pc_lo12(.LCPI2_4) vrepli.w $vr1, 128 - vst $vr1, $sp, 48 + vst $vr1, $sp, 32 vfclass.s $vr0, $vr0 - vst $vr0, $sp, 64 - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + vst $vr0, $sp, 48 + addi.d $a0, $sp, 32 + addi.d $a1, $sp, 48 ori $a2, $zero, 16 ori $a4, $zero, 47 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $fp, $sp, 80 # 8-byte Folded Reload - ld.d $ra, $sp, 88 # 8-byte Folded Reload - addi.d $sp, $sp, 96 + ld.d $fp, $sp, 64 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + addi.d $sp, $sp, 80 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_ceq.dir/lsx-vfcmp_ceq.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_ceq.dir/lsx-vfcmp_ceq.s index 61edd6a7..1c5b6c42 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_ceq.dir/lsx-vfcmp_ceq.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_ceq.dir/lsx-vfcmp_ceq.s @@ -372,10 +372,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -176 - st.d $ra, $sp, 168 # 8-byte Folded Spill - st.d $fp, $sp, 160 # 8-byte Folded Spill - st.d $s0, $sp, 152 # 8-byte Folded Spill + addi.d $sp, $sp, -160 + st.d $ra, $sp, 152 # 8-byte Folded Spill + st.d $fp, $sp, 144 # 8-byte Folded Spill vrepli.b $vr0, -1 vst $vr0, $sp, 80 # 16-byte Folded Spill vst $vr0, $sp, 112 @@ -611,15 +610,9 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $sp, 112 - ori $s0, $zero, 0 - ori $a0, $zero, 0 - lu32i.d $a0, 255 - vreplgr2vr.d $vr0, $a0 - ori $a0, $zero, 0 - lu32i.d $a0, -256 - lu52i.d $a0, $a0, 15 - vreplgr2vr.d $vr1, $a0 - vfcmp.ceq.d $vr0, $vr0, $vr1 + vldi $vr0, -1696 + vldi $vr1, -1776 + vfcmp.ceq.d $vr0, $vr1, $vr0 vst $vr0, $sp, 128 addi.d $a0, $sp, 112 addi.d $a1, $sp, 128 @@ -696,8 +689,7 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_19) pcalau12i $a0, %pc_hi20(.LCPI2_20) vld $vr1, $a0, %pc_lo12(.LCPI2_20) - lu32i.d $s0, -1 - vreplgr2vr.d $vr2, $s0 + vldi $vr2, -1552 vst $vr2, $sp, 112 vfcmp.cueq.s $vr0, $vr1, $vr0 vst $vr0, $sp, 128 @@ -818,9 +810,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr1, $sp, 96 # 16-byte Folded Reload vst $vr1, $sp, 112 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -2305 vfcmp.cueq.d $vr0, $vr0, $vr1 vst $vr0, $sp, 128 addi.d $a0, $sp, 112 @@ -976,10 +966,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 152 # 8-byte Folded Reload - ld.d $fp, $sp, 160 # 8-byte Folded Reload - ld.d $ra, $sp, 168 # 8-byte Folded Reload - addi.d $sp, $sp, 176 + ld.d $fp, $sp, 144 # 8-byte Folded Reload + ld.d $ra, $sp, 152 # 8-byte Folded Reload + addi.d $sp, $sp, 160 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_cle.dir/lsx-vfcmp_cle.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_cle.dir/lsx-vfcmp_cle.s index 186d79f8..36d5b932 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_cle.dir/lsx-vfcmp_cle.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_cle.dir/lsx-vfcmp_cle.s @@ -409,22 +409,21 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -176 - st.d $ra, $sp, 168 # 8-byte Folded Spill - st.d $fp, $sp, 160 # 8-byte Folded Spill - st.d $s0, $sp, 152 # 8-byte Folded Spill + addi.d $sp, $sp, -144 + st.d $ra, $sp, 136 # 8-byte Folded Spill + st.d $fp, $sp, 128 # 8-byte Folded Spill vrepli.b $vr0, -1 - vst $vr0, $sp, 96 # 16-byte Folded Spill - vst $vr0, $sp, 112 - vrepli.b $vr0, 0 vst $vr0, $sp, 80 # 16-byte Folded Spill + vst $vr0, $sp, 96 + vrepli.b $vr0, 0 + vst $vr0, $sp, 64 # 16-byte Folded Spill vfcmp.cle.s $vr0, $vr0, $vr0 - vst $vr0, $sp, 48 # 16-byte Folded Spill - vst $vr0, $sp, 128 + vst $vr0, $sp, 32 # 16-byte Folded Spill + vst $vr0, $sp, 112 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $fp, $a0, %pc_lo12(.L.str.5) - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 23 move $a3, $fp @@ -434,12 +433,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) vld $vr1, $a0, %pc_lo12(.LCPI2_1) - vld $vr2, $sp, 80 # 16-byte Folded Reload - vst $vr2, $sp, 112 + vld $vr2, $sp, 64 # 16-byte Folded Reload + vst $vr2, $sp, 96 vfcmp.cle.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 29 move $a3, $fp @@ -447,13 +446,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_2) vld $vr0, $a0, %pc_lo12(.LCPI2_2) - vld $vr1, $sp, 96 # 16-byte Folded Reload - vst $vr1, $sp, 112 vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr1, $sp, 96 + vld $vr1, $sp, 64 # 16-byte Folded Reload vfcmp.cle.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 35 move $a3, $fp @@ -461,15 +460,15 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_3) vld $vr1, $a0, %pc_lo12(.LCPI2_3) - vst $vr1, $sp, 32 # 16-byte Folded Spill + vst $vr1, $sp, 16 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_4) vld $vr0, $a0, %pc_lo12(.LCPI2_4) - vst $vr1, $sp, 112 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr1, $sp, 96 + vld $vr1, $sp, 64 # 16-byte Folded Reload vfcmp.cle.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 41 move $a3, $fp @@ -479,14 +478,14 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_5) pcalau12i $a0, %pc_hi20(.LCPI2_6) vld $vr2, $a0, %pc_lo12(.LCPI2_6) - vst $vr2, $sp, 64 # 16-byte Folded Spill + vst $vr2, $sp, 48 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_7) vld $vr1, $a0, %pc_lo12(.LCPI2_7) - vst $vr0, $sp, 112 + vst $vr0, $sp, 96 vfcmp.cle.s $vr0, $vr1, $vr2 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 47 move $a3, $fp @@ -496,12 +495,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_8) pcalau12i $a0, %pc_hi20(.LCPI2_9) vld $vr1, $a0, %pc_lo12(.LCPI2_9) - vld $vr2, $sp, 64 # 16-byte Folded Reload - vst $vr2, $sp, 112 + vld $vr2, $sp, 48 # 16-byte Folded Reload + vst $vr2, $sp, 96 vfcmp.cle.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 53 move $a3, $fp @@ -511,12 +510,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_10) pcalau12i $a0, %pc_hi20(.LCPI2_11) vld $vr1, $a0, %pc_lo12(.LCPI2_11) - vld $vr2, $sp, 32 # 16-byte Folded Reload - vst $vr2, $sp, 112 + vld $vr2, $sp, 16 # 16-byte Folded Reload + vst $vr2, $sp, 96 vfcmp.cle.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 59 move $a3, $fp @@ -524,13 +523,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_12) vld $vr0, $a0, %pc_lo12(.LCPI2_12) - vld $vr1, $sp, 96 # 16-byte Folded Reload - vst $vr1, $sp, 112 vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr1, $sp, 96 + vld $vr1, $sp, 64 # 16-byte Folded Reload vfcmp.cle.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 65 move $a3, $fp @@ -538,41 +537,37 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_13) vld $vr0, $a0, %pc_lo12(.LCPI2_13) - vld $vr1, $sp, 64 # 16-byte Folded Reload - vst $vr1, $sp, 112 + vld $vr1, $sp, 48 # 16-byte Folded Reload + vst $vr1, $sp, 96 vfcmp.cle.s $vr0, $vr0, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 71 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $s0, $zero, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - pcalau12i $a1, %pc_hi20(.LCPI2_14) - vld $vr0, $a1, %pc_lo12(.LCPI2_14) - vreplgr2vr.d $vr1, $a0 - vst $vr1, $sp, 16 # 16-byte Folded Spill - vst $vr1, $sp, 112 + pcalau12i $a0, %pc_hi20(.LCPI2_14) + vld $vr0, $a0, %pc_lo12(.LCPI2_14) + vldi $vr1, -1552 + vst $vr1, $sp, 96 vrepli.w $vr1, 1 vfcmp.cle.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 77 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 32 # 16-byte Folded Reload vst $vr0, $sp, 112 - vld $vr0, $sp, 48 # 16-byte Folded Reload - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 83 move $a3, $fp @@ -582,12 +577,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_15) pcalau12i $a0, %pc_hi20(.LCPI2_16) vld $vr1, $a0, %pc_lo12(.LCPI2_16) - vld $vr2, $sp, 96 # 16-byte Folded Reload - vst $vr2, $sp, 112 + vld $vr2, $sp, 80 # 16-byte Folded Reload + vst $vr2, $sp, 96 vfcmp.cle.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 89 move $a3, $fp @@ -597,12 +592,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_17) pcalau12i $a0, %pc_hi20(.LCPI2_18) vld $vr1, $a0, %pc_lo12(.LCPI2_18) - vld $vr2, $sp, 16 # 16-byte Folded Reload - vst $vr2, $sp, 112 + vldi $vr2, -1552 + vst $vr2, $sp, 96 vfcmp.cle.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 95 move $a3, $fp @@ -612,25 +607,25 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_19) pcalau12i $a0, %pc_hi20(.LCPI2_20) vld $vr1, $a0, %pc_lo12(.LCPI2_20) - vst $vr0, $sp, 112 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 48 # 16-byte Folded Reload vfcmp.cle.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 101 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 112 vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload vfcmp.cle.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 48 # 16-byte Folded Spill - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 32 # 16-byte Folded Spill + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 107 move $a3, $fp @@ -638,13 +633,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_21) vld $vr0, $a0, %pc_lo12(.LCPI2_21) - vld $vr1, $sp, 96 # 16-byte Folded Reload - vst $vr1, $sp, 112 vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr1, $sp, 96 + vld $vr1, $sp, 64 # 16-byte Folded Reload vfcmp.cle.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 113 move $a3, $fp @@ -652,62 +647,62 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_22) vld $vr0, $a0, %pc_lo12(.LCPI2_22) - vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 112 + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 96 vfcmp.cle.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 119 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 32 # 16-byte Folded Reload vst $vr0, $sp, 112 - vld $vr0, $sp, 48 # 16-byte Folded Reload - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 125 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 32 # 16-byte Folded Reload vst $vr0, $sp, 112 - vld $vr0, $sp, 48 # 16-byte Folded Reload - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 131 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 112 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 96 ori $a0, $zero, 2056 vreplgr2vr.w $vr0, $a0 lu12i.w $a0, -522240 ori $a0, $a0, 1 vreplgr2vr.d $vr1, $a0 vfcmp.cle.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 137 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 112 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 96 + vld $vr0, $sp, 80 # 16-byte Folded Reload vfcmp.cle.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 143 move $a3, $fp @@ -715,13 +710,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_23) vld $vr0, $a0, %pc_lo12(.LCPI2_23) + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 96 vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 112 - vld $vr1, $sp, 96 # 16-byte Folded Reload vfcmp.cle.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 149 move $a3, $fp @@ -729,24 +724,24 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_24) vld $vr0, $a0, %pc_lo12(.LCPI2_24) - vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 112 + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 96 vfcmp.cle.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 155 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 112 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 96 + vld $vr0, $sp, 80 # 16-byte Folded Reload vfcmp.cle.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 161 move $a3, $fp @@ -754,13 +749,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_25) vld $vr0, $a0, %pc_lo12(.LCPI2_25) - vld $vr1, $sp, 96 # 16-byte Folded Reload - vst $vr1, $sp, 112 vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr1, $sp, 96 + vld $vr1, $sp, 64 # 16-byte Folded Reload vfcmp.cle.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 167 move $a3, $fp @@ -770,12 +765,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_26) pcalau12i $a0, %pc_hi20(.LCPI2_27) vld $vr1, $a0, %pc_lo12(.LCPI2_27) - vld $vr2, $sp, 64 # 16-byte Folded Reload - vst $vr2, $sp, 112 + vld $vr2, $sp, 48 # 16-byte Folded Reload + vst $vr2, $sp, 96 vfcmp.cle.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 173 move $a3, $fp @@ -785,37 +780,37 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_28) pcalau12i $a0, %pc_hi20(.LCPI2_29) vld $vr1, $a0, %pc_lo12(.LCPI2_29) - vld $vr2, $sp, 80 # 16-byte Folded Reload - vst $vr2, $sp, 112 + vld $vr2, $sp, 64 # 16-byte Folded Reload + vst $vr2, $sp, 96 vfcmp.cle.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 179 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 32 # 16-byte Folded Reload vst $vr0, $sp, 112 - vld $vr0, $sp, 48 # 16-byte Folded Reload - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 185 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 112 + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 96 lu12i.w $a0, -51809 ori $a0, $a0, 856 vreplgr2vr.d $vr0, $a0 vfcmp.cle.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 191 move $a3, $fp @@ -823,25 +818,25 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_30) vld $vr0, $a0, %pc_lo12(.LCPI2_30) - vld $vr1, $sp, 96 # 16-byte Folded Reload - vst $vr1, $sp, 112 + vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr1, $sp, 96 vrepli.b $vr1, 99 vfcmp.cle.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 197 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 112 + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 vrepli.d $vr0, 1 vfcmp.cule.s $vr0, $vr0, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 203 move $a3, $fp @@ -849,26 +844,26 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_31) vld $vr1, $a0, %pc_lo12(.LCPI2_31) - vst $vr1, $sp, 64 # 16-byte Folded Spill - vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 112 + vst $vr1, $sp, 48 # 16-byte Folded Spill vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload vfcmp.cule.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 209 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 112 vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload vfcmp.cule.s $vr0, $vr0, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 215 move $a3, $fp @@ -878,12 +873,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_32) pcalau12i $a0, %pc_hi20(.LCPI2_33) vld $vr1, $a0, %pc_lo12(.LCPI2_33) - vld $vr2, $sp, 96 # 16-byte Folded Reload - vst $vr2, $sp, 112 + vld $vr2, $sp, 80 # 16-byte Folded Reload + vst $vr2, $sp, 96 vfcmp.cule.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 221 move $a3, $fp @@ -893,12 +888,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_34) pcalau12i $a0, %pc_hi20(.LCPI2_35) vld $vr1, $a0, %pc_lo12(.LCPI2_35) - vld $vr2, $sp, 64 # 16-byte Folded Reload - vst $vr2, $sp, 112 + vld $vr2, $sp, 48 # 16-byte Folded Reload + vst $vr2, $sp, 96 vfcmp.cule.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 227 move $a3, $fp @@ -908,24 +903,24 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_36) pcalau12i $a0, %pc_hi20(.LCPI2_37) vld $vr1, $a0, %pc_lo12(.LCPI2_37) - vst $vr0, $sp, 112 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload vfcmp.cule.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 233 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 112 + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 96 vrepli.b $vr0, -100 vfcmp.cule.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 239 move $a3, $fp @@ -933,12 +928,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_38) vld $vr0, $a0, %pc_lo12(.LCPI2_38) - vld $vr1, $sp, 96 # 16-byte Folded Reload - vst $vr1, $sp, 112 + vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr1, $sp, 96 vfcmp.cule.s $vr0, $vr0, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 245 move $a3, $fp @@ -946,112 +941,112 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_39) vld $vr0, $a0, %pc_lo12(.LCPI2_39) - vld $vr1, $sp, 96 # 16-byte Folded Reload - vst $vr1, $sp, 112 vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr1, $sp, 96 + vld $vr1, $sp, 64 # 16-byte Folded Reload vfcmp.cule.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 251 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 112 - lu32i.d $s0, -524288 - lu52i.d $a0, $s0, 2047 + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 vreplgr2vr.d $vr0, $a0 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload vfcmp.cule.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 257 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 32 # 16-byte Folded Reload - vst $vr0, $sp, 112 + vld $vr0, $sp, 16 # 16-byte Folded Reload + vst $vr0, $sp, 96 pcalau12i $a0, %pc_hi20(.LCPI2_40) vld $vr0, $a0, %pc_lo12(.LCPI2_40) lu12i.w $a0, 14 ori $a0, $a0, 3578 vreplgr2vr.h $vr1, $a0 vfcmp.cule.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 263 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 112 vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload vfcmp.cule.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 64 # 16-byte Folded Spill - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 48 # 16-byte Folded Spill + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 269 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 96 # 16-byte Folded Reload - vst $vr1, $sp, 112 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr1, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload vfcmp.cule.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + vst $vr0, $sp, 112 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 275 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 112 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 281 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 112 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 287 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 96 + vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 112 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 128 - addi.d $a0, $sp, 112 - addi.d $a1, $sp, 128 + addi.d $a0, $sp, 96 + addi.d $a1, $sp, 112 ori $a2, $zero, 16 ori $a4, $zero, 293 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 152 # 8-byte Folded Reload - ld.d $fp, $sp, 160 # 8-byte Folded Reload - ld.d $ra, $sp, 168 # 8-byte Folded Reload - addi.d $sp, $sp, 176 + ld.d $fp, $sp, 128 # 8-byte Folded Reload + ld.d $ra, $sp, 136 # 8-byte Folded Reload + addi.d $sp, $sp, 144 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_clt.dir/lsx-vfcmp_clt.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_clt.dir/lsx-vfcmp_clt.s index 5f325895..7387dc57 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_clt.dir/lsx-vfcmp_clt.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_clt.dir/lsx-vfcmp_clt.s @@ -596,10 +596,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 80 # 16-byte Folded Reload vst $vr0, $sp, 96 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 32 # 16-byte Folded Spill + vldi $vr0, -1777 vfcmp.clt.s $vr0, $vr0, $vr0 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 @@ -627,7 +624,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_16) vld $vr2, $a0, %pc_lo12(.LCPI2_16) - vst $vr2, $sp, 16 # 16-byte Folded Spill + vst $vr2, $sp, 32 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_17) vld $vr0, $a0, %pc_lo12(.LCPI2_17) pcalau12i $a0, %pc_hi20(.LCPI2_18) @@ -646,7 +643,7 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_19) pcalau12i $a0, %pc_hi20(.LCPI2_20) vld $vr1, $a0, %pc_lo12(.LCPI2_20) - vld $vr2, $sp, 16 # 16-byte Folded Reload + vld $vr2, $sp, 32 # 16-byte Folded Reload vst $vr2, $sp, 96 vfcmp.clt.s $vr0, $vr1, $vr0 vst $vr0, $sp, 112 @@ -704,7 +701,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_27) vld $vr2, $a0, %pc_lo12(.LCPI2_27) - vst $vr2, $sp, 16 # 16-byte Folded Spill + vst $vr2, $sp, 32 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_28) vld $vr0, $a0, %pc_lo12(.LCPI2_28) pcalau12i $a0, %pc_hi20(.LCPI2_29) @@ -822,7 +819,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_40) vld $vr0, $a0, %pc_lo12(.LCPI2_40) - vld $vr1, $sp, 32 # 16-byte Folded Reload + vldi $vr1, -1777 vst $vr1, $sp, 96 vfcmp.cult.s $vr0, $vr0, $vr0 vst $vr0, $sp, 112 @@ -850,7 +847,7 @@ main: # @main vst $vr0, $sp, 96 vld $vr1, $sp, 80 # 16-byte Folded Reload vfcmp.cult.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 32 # 16-byte Folded Spill + vst $vr0, $sp, 16 # 16-byte Folded Spill vst $vr0, $sp, 112 addi.d $a0, $sp, 96 addi.d $a1, $sp, 112 @@ -891,7 +888,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 64 # 16-byte Folded Reload vst $vr0, $sp, 96 - vld $vr0, $sp, 32 # 16-byte Folded Reload + vld $vr0, $sp, 16 # 16-byte Folded Reload vst $vr0, $sp, 112 addi.d $a0, $sp, 96 addi.d $a1, $sp, 112 @@ -930,7 +927,7 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_48) pcalau12i $a0, %pc_hi20(.LCPI2_49) vld $vr1, $a0, %pc_lo12(.LCPI2_49) - vld $vr2, $sp, 16 # 16-byte Folded Reload + vld $vr2, $sp, 32 # 16-byte Folded Reload vst $vr2, $sp, 96 vfcmp.cult.d $vr0, $vr1, $vr0 vst $vr0, $sp, 112 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_cor.dir/lsx-vfcmp_cor.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_cor.dir/lsx-vfcmp_cor.s index c1c096dc..4aadd8a4 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_cor.dir/lsx-vfcmp_cor.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_cor.dir/lsx-vfcmp_cor.s @@ -401,8 +401,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 64 - lu12i.w $a0, 260096 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -1424 vfcmp.cor.d $vr0, $vr0, $vr0 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_cun.dir/lsx-vfcmp_cun.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_cun.dir/lsx-vfcmp_cun.s index 9d6feb88..8839213f 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_cun.dir/lsx-vfcmp_cun.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_cun.dir/lsx-vfcmp_cun.s @@ -417,14 +417,13 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vldi $vr0, -1777 + vst $vr0, $sp, 80 + pcalau12i $a0, %pc_hi20(.LCPI2_7) + vld $vr0, $a0, %pc_lo12(.LCPI2_7) addi.w $a0, $zero, -1 - lu52i.d $a1, $a0, -17 - lu32i.d $a0, 0 - pcalau12i $a2, %pc_hi20(.LCPI2_7) - vld $vr0, $a2, %pc_lo12(.LCPI2_7) + lu52i.d $a0, $a0, -17 vreplgr2vr.d $vr1, $a0 - vst $vr1, $sp, 80 - vreplgr2vr.d $vr1, $a1 vfcmp.cun.s $vr0, $vr1, $vr0 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_seq.dir/lsx-vfcmp_seq.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_seq.dir/lsx-vfcmp_seq.s index f2912878..04a771c2 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_seq.dir/lsx-vfcmp_seq.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_seq.dir/lsx-vfcmp_seq.s @@ -653,9 +653,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1552 vst $vr0, $sp, 96 vrepli.d $vr0, 64 vld $vr1, $sp, 80 # 16-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sle.dir/lsx-vfcmp_sle.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sle.dir/lsx-vfcmp_sle.s index 54eab7ad..5eae92d8 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sle.dir/lsx-vfcmp_sle.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sle.dir/lsx-vfcmp_sle.s @@ -569,13 +569,11 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 80 pcalau12i $a0, %pc_hi20(.LCPI2_16) vld $vr0, $a0, %pc_lo12(.LCPI2_16) - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr1, $a0 + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 80 + vldi $vr1, -1777 vfcmp.sle.d $vr0, $vr0, $vr1 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_slt.dir/lsx-vfcmp_slt.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_slt.dir/lsx-vfcmp_slt.s index 5beba91f..72b2584d 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_slt.dir/lsx-vfcmp_slt.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_slt.dir/lsx-vfcmp_slt.s @@ -387,37 +387,36 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -240 - st.d $ra, $sp, 232 # 8-byte Folded Spill - st.d $fp, $sp, 224 # 8-byte Folded Spill - st.d $s0, $sp, 216 # 8-byte Folded Spill + addi.d $sp, $sp, -208 + st.d $ra, $sp, 200 # 8-byte Folded Spill + st.d $fp, $sp, 192 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) vld $vr1, $a0, %pc_lo12(.LCPI2_1) - vst $vr0, $sp, 176 + vst $vr0, $sp, 160 vrepli.b $vr0, 0 - vst $vr0, $sp, 160 # 16-byte Folded Spill + vst $vr0, $sp, 144 # 16-byte Folded Spill vfcmp.slt.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 + vst $vr0, $sp, 176 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $fp, $a0, %pc_lo12(.L.str.5) - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 23 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 176 + vld $vr0, $sp, 144 # 16-byte Folded Reload + vst $vr0, $sp, 160 lu12i.w $a0, 522240 vreplgr2vr.w $vr0, $a0 - vst $vr0, $sp, 144 # 16-byte Folded Spill + vst $vr0, $sp, 128 # 16-byte Folded Spill vfcmp.slt.s $vr0, $vr0, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 29 move $a3, $fp @@ -425,12 +424,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_2) vld $vr0, $a0, %pc_lo12(.LCPI2_2) - vld $vr1, $sp, 160 # 16-byte Folded Reload - vst $vr1, $sp, 176 + vld $vr1, $sp, 144 # 16-byte Folded Reload + vst $vr1, $sp, 160 vfcmp.slt.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 35 move $a3, $fp @@ -438,12 +437,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_3) vld $vr0, $a0, %pc_lo12(.LCPI2_3) - vld $vr1, $sp, 160 # 16-byte Folded Reload - vst $vr1, $sp, 176 + vld $vr1, $sp, 144 # 16-byte Folded Reload + vst $vr1, $sp, 160 vfcmp.slt.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 41 move $a3, $fp @@ -456,11 +455,11 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_6) vld $vr2, $a0, %pc_lo12(.LCPI2_6) vst $vr2, $sp, 32 # 16-byte Folded Spill - vst $vr0, $sp, 176 + vst $vr0, $sp, 160 vfcmp.slt.s $vr0, $vr2, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 47 move $a3, $fp @@ -472,11 +471,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_8) pcalau12i $a0, %pc_hi20(.LCPI2_9) vld $vr2, $a0, %pc_lo12(.LCPI2_9) - vst $vr0, $sp, 176 + vst $vr0, $sp, 160 vfcmp.slt.s $vr0, $vr2, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 53 move $a3, $fp @@ -484,44 +483,41 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_10) vld $vr1, $a0, %pc_lo12(.LCPI2_10) - vst $vr1, $sp, 80 # 16-byte Folded Spill + vst $vr1, $sp, 64 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_11) vld $vr0, $a0, %pc_lo12(.LCPI2_11) - vld $vr2, $sp, 160 # 16-byte Folded Reload - vst $vr2, $sp, 176 + vld $vr2, $sp, 144 # 16-byte Folded Reload + vst $vr2, $sp, 160 vfcmp.slt.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 59 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 160 # 16-byte Folded Reload - vst $vr1, $sp, 176 - vld $vr0, $sp, 144 # 16-byte Folded Reload + vld $vr1, $sp, 144 # 16-byte Folded Reload + vst $vr1, $sp, 160 + vld $vr0, $sp, 128 # 16-byte Folded Reload vfcmp.slt.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 65 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $s0, $zero, 0 - lu32i.d $s0, -1 - vreplgr2vr.d $vr0, $s0 - vst $vr0, $sp, 64 # 16-byte Folded Spill - vst $vr0, $sp, 176 + vldi $vr0, -1552 + vst $vr0, $sp, 160 lu52i.d $a0, $zero, -8 vreplgr2vr.d $vr0, $a0 - vld $vr1, $sp, 160 # 16-byte Folded Reload + vld $vr1, $sp, 144 # 16-byte Folded Reload vfcmp.slt.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 71 move $a3, $fp @@ -530,25 +526,25 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_12) vld $vr0, $a0, %pc_lo12(.LCPI2_12) vst $vr0, $sp, 16 # 16-byte Folded Spill - vld $vr1, $sp, 160 # 16-byte Folded Reload - vst $vr1, $sp, 176 + vld $vr1, $sp, 144 # 16-byte Folded Reload + vst $vr1, $sp, 160 vfcmp.slt.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 77 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 176 + vld $vr0, $sp, 144 # 16-byte Folded Reload + vst $vr0, $sp, 160 vrepli.b $vr0, -1 - vst $vr0, $sp, 144 # 16-byte Folded Spill + vst $vr0, $sp, 128 # 16-byte Folded Spill vfcmp.slt.s $vr0, $vr0, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 83 move $a3, $fp @@ -558,24 +554,24 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_13) pcalau12i $a0, %pc_hi20(.LCPI2_14) vld $vr1, $a0, %pc_lo12(.LCPI2_14) - vld $vr2, $sp, 144 # 16-byte Folded Reload - vst $vr2, $sp, 176 + vld $vr2, $sp, 128 # 16-byte Folded Reload + vst $vr2, $sp, 160 vfcmp.slt.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 89 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 176 + vld $vr0, $sp, 144 # 16-byte Folded Reload + vst $vr0, $sp, 160 vfcmp.slt.d $vr1, $vr0, $vr0 - vst $vr1, $sp, 128 # 16-byte Folded Spill - vst $vr1, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr1, $sp, 112 # 16-byte Folded Spill + vst $vr1, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 95 move $a3, $fp @@ -583,31 +579,31 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_15) vld $vr1, $a0, %pc_lo12(.LCPI2_15) - vst $vr1, $sp, 96 # 16-byte Folded Spill + vst $vr1, $sp, 80 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_16) vld $vr0, $a0, %pc_lo12(.LCPI2_16) - vst $vr1, $sp, 176 - vld $vr1, $sp, 160 # 16-byte Folded Reload + vst $vr1, $sp, 160 + vld $vr1, $sp, 144 # 16-byte Folded Reload vfcmp.slt.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 101 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 176 + vld $vr0, $sp, 144 # 16-byte Folded Reload + vst $vr0, $sp, 160 pcalau12i $a0, %pc_hi20(.LCPI2_17) vld $vr0, $a0, %pc_lo12(.LCPI2_17) lu12i.w $a0, -5 ori $a0, $a0, 1279 vreplgr2vr.d $vr1, $a0 vfcmp.slt.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 107 move $a3, $fp @@ -616,26 +612,28 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_18) vld $vr0, $a0, %pc_lo12(.LCPI2_18) vst $vr0, $sp, 48 # 16-byte Folded Spill - vld $vr1, $sp, 160 # 16-byte Folded Reload - vst $vr1, $sp, 176 + vld $vr1, $sp, 144 # 16-byte Folded Reload + vst $vr1, $sp, 160 vfcmp.slt.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 113 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 144 # 16-byte Folded Reload - vst $vr0, $sp, 176 - lu52i.d $a0, $s0, 3 + vld $vr0, $sp, 128 # 16-byte Folded Reload + vst $vr0, $sp, 160 + ori $a0, $zero, 0 + lu32i.d $a0, -1 + lu52i.d $a0, $a0, 3 vreplgr2vr.d $vr0, $a0 - vld $vr1, $sp, 160 # 16-byte Folded Reload + vld $vr1, $sp, 144 # 16-byte Folded Reload vfcmp.slt.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 119 move $a3, $fp @@ -643,13 +641,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_19) vld $vr0, $a0, %pc_lo12(.LCPI2_19) + vld $vr1, $sp, 128 # 16-byte Folded Reload + vst $vr1, $sp, 160 vld $vr1, $sp, 144 # 16-byte Folded Reload - vst $vr1, $sp, 176 - vld $vr1, $sp, 160 # 16-byte Folded Reload vfcmp.slt.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 125 move $a3, $fp @@ -657,24 +655,24 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_20) vld $vr0, $a0, %pc_lo12(.LCPI2_20) - vld $vr1, $sp, 160 # 16-byte Folded Reload - vst $vr1, $sp, 176 - vld $vr1, $sp, 96 # 16-byte Folded Reload + vld $vr1, $sp, 144 # 16-byte Folded Reload + vst $vr1, $sp, 160 + vld $vr1, $sp, 80 # 16-byte Folded Reload vfcmp.slt.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 131 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 160 # 16-byte Folded Reload + vld $vr0, $sp, 144 # 16-byte Folded Reload + vst $vr0, $sp, 160 + vld $vr0, $sp, 112 # 16-byte Folded Reload vst $vr0, $sp, 176 - vld $vr0, $sp, 128 # 16-byte Folded Reload - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 137 move $a3, $fp @@ -682,13 +680,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_21) vld $vr0, $a0, %pc_lo12(.LCPI2_21) - vld $vr1, $sp, 160 # 16-byte Folded Reload - vst $vr1, $sp, 176 vld $vr1, $sp, 144 # 16-byte Folded Reload + vst $vr1, $sp, 160 + vld $vr1, $sp, 128 # 16-byte Folded Reload vfcmp.slt.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 143 move $a3, $fp @@ -696,24 +694,24 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_22) vld $vr0, $a0, %pc_lo12(.LCPI2_22) - vld $vr1, $sp, 160 # 16-byte Folded Reload - vst $vr1, $sp, 176 + vld $vr1, $sp, 144 # 16-byte Folded Reload + vst $vr1, $sp, 160 vfcmp.slt.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 149 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 160 # 16-byte Folded Reload - vst $vr1, $sp, 176 - vld $vr0, $sp, 144 # 16-byte Folded Reload + vld $vr1, $sp, 144 # 16-byte Folded Reload + vst $vr1, $sp, 160 + vld $vr0, $sp, 128 # 16-byte Folded Reload vfcmp.slt.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 155 move $a3, $fp @@ -721,37 +719,37 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_23) vld $vr1, $a0, %pc_lo12(.LCPI2_23) - vst $vr1, $sp, 128 # 16-byte Folded Spill + vst $vr1, $sp, 112 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_24) vld $vr0, $a0, %pc_lo12(.LCPI2_24) - vst $vr1, $sp, 176 + vst $vr1, $sp, 160 vfcmp.sult.s $vr0, $vr0, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 161 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 176 + vld $vr0, $sp, 144 # 16-byte Folded Reload + vst $vr0, $sp, 160 vfcmp.sult.s $vr0, $vr0, $vr0 - vst $vr0, $sp, 112 # 16-byte Folded Spill - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 96 # 16-byte Folded Spill + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 167 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 160 # 16-byte Folded Reload + vld $vr0, $sp, 144 # 16-byte Folded Reload + vst $vr0, $sp, 160 + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $sp, 176 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 173 move $a3, $fp @@ -762,11 +760,11 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_26) vld $vr1, $a0, %pc_lo12(.LCPI2_26) vld $vr2, $sp, 16 # 16-byte Folded Reload - vst $vr2, $sp, 176 + vst $vr2, $sp, 160 vfcmp.sult.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 179 move $a3, $fp @@ -777,22 +775,22 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_28) vld $vr1, $a0, %pc_lo12(.LCPI2_28) vld $vr2, $sp, 32 # 16-byte Folded Reload - vst $vr2, $sp, 176 + vst $vr2, $sp, 160 vfcmp.sult.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 185 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 160 # 16-byte Folded Reload + vld $vr0, $sp, 144 # 16-byte Folded Reload + vst $vr0, $sp, 160 + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $sp, 176 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 191 move $a3, $fp @@ -800,35 +798,35 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_29) vld $vr0, $a0, %pc_lo12(.LCPI2_29) - vld $vr1, $sp, 144 # 16-byte Folded Reload - vst $vr1, $sp, 176 + vld $vr1, $sp, 128 # 16-byte Folded Reload + vst $vr1, $sp, 160 vrepli.d $vr1, -2 vfcmp.sult.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 197 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 160 # 16-byte Folded Reload + vld $vr0, $sp, 144 # 16-byte Folded Reload + vst $vr0, $sp, 160 + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $sp, 176 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 203 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 160 # 16-byte Folded Reload + vld $vr0, $sp, 144 # 16-byte Folded Reload + vst $vr0, $sp, 160 + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $sp, 176 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 209 move $a3, $fp @@ -836,37 +834,37 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_30) vld $vr0, $a0, %pc_lo12(.LCPI2_30) - vld $vr1, $sp, 128 # 16-byte Folded Reload - vst $vr1, $sp, 176 - vld $vr1, $sp, 160 # 16-byte Folded Reload + vld $vr1, $sp, 112 # 16-byte Folded Reload + vst $vr1, $sp, 160 + vld $vr1, $sp, 144 # 16-byte Folded Reload vfcmp.sult.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 215 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 160 # 16-byte Folded Reload - vst $vr1, $sp, 176 + vld $vr1, $sp, 144 # 16-byte Folded Reload + vst $vr1, $sp, 160 vrepli.h $vr0, 1 vfcmp.sult.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 221 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 144 # 16-byte Folded Reload - vst $vr1, $sp, 176 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vld $vr1, $sp, 128 # 16-byte Folded Reload + vst $vr1, $sp, 160 + vldi $vr0, -1552 vfcmp.sult.s $vr0, $vr1, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 227 move $a3, $fp @@ -876,24 +874,24 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_31) pcalau12i $a0, %pc_hi20(.LCPI2_32) vld $vr1, $a0, %pc_lo12(.LCPI2_32) - vst $vr0, $sp, 176 - vld $vr0, $sp, 160 # 16-byte Folded Reload + vst $vr0, $sp, 160 + vld $vr0, $sp, 144 # 16-byte Folded Reload vfcmp.sult.s $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 233 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 176 + vld $vr0, $sp, 144 # 16-byte Folded Reload + vst $vr0, $sp, 160 vfcmp.sult.d $vr1, $vr0, $vr0 - vst $vr1, $sp, 112 # 16-byte Folded Spill - vst $vr1, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr1, $sp, 96 # 16-byte Folded Spill + vst $vr1, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 239 move $a3, $fp @@ -901,13 +899,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_33) vld $vr0, $a0, %pc_lo12(.LCPI2_33) - vld $vr1, $sp, 128 # 16-byte Folded Reload - vst $vr1, $sp, 176 - vld $vr1, $sp, 160 # 16-byte Folded Reload + vld $vr1, $sp, 112 # 16-byte Folded Reload + vst $vr1, $sp, 160 + vld $vr1, $sp, 144 # 16-byte Folded Reload vfcmp.sult.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 245 move $a3, $fp @@ -915,13 +913,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_34) vld $vr0, $a0, %pc_lo12(.LCPI2_34) - vld $vr1, $sp, 144 # 16-byte Folded Reload - vst $vr1, $sp, 176 - vld $vr1, $sp, 96 # 16-byte Folded Reload + vld $vr1, $sp, 128 # 16-byte Folded Reload + vst $vr1, $sp, 160 + vld $vr1, $sp, 80 # 16-byte Folded Reload vfcmp.sult.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 251 move $a3, $fp @@ -931,24 +929,24 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_35) pcalau12i $a0, %pc_hi20(.LCPI2_36) vld $vr1, $a0, %pc_lo12(.LCPI2_36) - vld $vr2, $sp, 128 # 16-byte Folded Reload - vst $vr2, $sp, 176 + vld $vr2, $sp, 112 # 16-byte Folded Reload + vst $vr2, $sp, 160 vfcmp.sult.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 257 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 160 # 16-byte Folded Reload - vst $vr1, $sp, 176 + vld $vr1, $sp, 144 # 16-byte Folded Reload + vst $vr1, $sp, 160 vld $vr0, $sp, 48 # 16-byte Folded Reload vfcmp.sult.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 263 move $a3, $fp @@ -958,46 +956,45 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_37) pcalau12i $a0, %pc_hi20(.LCPI2_38) vld $vr1, $a0, %pc_lo12(.LCPI2_38) - vld $vr2, $sp, 144 # 16-byte Folded Reload - vst $vr2, $sp, 176 + vld $vr2, $sp, 128 # 16-byte Folded Reload + vst $vr2, $sp, 160 vfcmp.sult.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 269 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 128 # 16-byte Folded Reload - vst $vr0, $sp, 176 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vld $vr1, $sp, 160 # 16-byte Folded Reload + vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 160 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vld $vr1, $sp, 144 # 16-byte Folded Reload vfcmp.sult.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + vst $vr0, $sp, 176 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 275 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 160 # 16-byte Folded Reload + vld $vr0, $sp, 144 # 16-byte Folded Reload + vst $vr0, $sp, 160 + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $sp, 176 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 192 - addi.d $a0, $sp, 176 - addi.d $a1, $sp, 192 + addi.d $a0, $sp, 160 + addi.d $a1, $sp, 176 ori $a2, $zero, 16 ori $a4, $zero, 281 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 216 # 8-byte Folded Reload - ld.d $fp, $sp, 224 # 8-byte Folded Reload - ld.d $ra, $sp, 232 # 8-byte Folded Reload - addi.d $sp, $sp, 240 + ld.d $fp, $sp, 192 # 8-byte Folded Reload + ld.d $ra, $sp, 200 # 8-byte Folded Reload + addi.d $sp, $sp, 208 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sne.dir/lsx-vfcmp_sne.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sne.dir/lsx-vfcmp_sne.s index 738ae461..a3b5ead2 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sne.dir/lsx-vfcmp_sne.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sne.dir/lsx-vfcmp_sne.s @@ -360,18 +360,14 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -160 - st.d $ra, $sp, 152 # 8-byte Folded Spill - st.d $fp, $sp, 144 # 8-byte Folded Spill - st.d $s0, $sp, 136 # 8-byte Folded Spill - ori $a0, $zero, 0 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - vld $vr0, $a1, %pc_lo12(.LCPI2_0) - pcalau12i $a1, %pc_hi20(.LCPI2_1) - vld $vr1, $a1, %pc_lo12(.LCPI2_1) - lu32i.d $a0, -1 - vreplgr2vr.d $vr2, $a0 - vst $vr2, $sp, 32 # 16-byte Folded Spill + addi.d $sp, $sp, -144 + st.d $ra, $sp, 136 # 8-byte Folded Spill + st.d $fp, $sp, 128 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(.LCPI2_0) + vld $vr0, $a0, %pc_lo12(.LCPI2_0) + pcalau12i $a0, %pc_hi20(.LCPI2_1) + vld $vr1, $a0, %pc_lo12(.LCPI2_1) + vldi $vr2, -1552 vst $vr2, $sp, 96 vfcmp.sne.s $vr0, $vr1, $vr0 vst $vr0, $sp, 112 @@ -386,7 +382,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_2) vld $vr1, $a0, %pc_lo12(.LCPI2_2) - vst $vr1, $sp, 48 # 16-byte Folded Spill + vst $vr1, $sp, 32 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_3) vld $vr0, $a0, %pc_lo12(.LCPI2_3) vst $vr1, $sp, 96 @@ -403,7 +399,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_4) vld $vr2, $a0, %pc_lo12(.LCPI2_4) - vst $vr2, $sp, 16 # 16-byte Folded Spill + vst $vr2, $sp, 48 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_5) vld $vr0, $a0, %pc_lo12(.LCPI2_5) vrepli.b $vr1, -1 @@ -420,9 +416,9 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_6) vld $vr0, $a0, %pc_lo12(.LCPI2_6) - vld $vr1, $sp, 32 # 16-byte Folded Reload + vldi $vr1, -1552 vst $vr1, $sp, 96 - vld $vr1, $sp, 16 # 16-byte Folded Reload + vld $vr1, $sp, 48 # 16-byte Folded Reload vfcmp.sne.s $vr0, $vr0, $vr1 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 @@ -461,7 +457,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_8) vld $vr0, $a0, %pc_lo12(.LCPI2_8) - vst $vr0, $sp, 32 # 16-byte Folded Spill + vst $vr0, $sp, 48 # 16-byte Folded Spill vld $vr1, $sp, 80 # 16-byte Folded Reload vst $vr1, $sp, 96 vfcmp.sne.s $vr0, $vr0, $vr1 @@ -565,7 +561,7 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_15) pcalau12i $a0, %pc_hi20(.LCPI2_16) vld $vr1, $a0, %pc_lo12(.LCPI2_16) - vld $vr2, $sp, 48 # 16-byte Folded Reload + vld $vr2, $sp, 32 # 16-byte Folded Reload vst $vr2, $sp, 96 vfcmp.sne.d $vr0, $vr1, $vr0 vst $vr0, $sp, 112 @@ -620,8 +616,8 @@ main: # @main vst $vr0, $sp, 96 pcalau12i $a0, %pc_hi20(.LCPI2_20) vld $vr0, $a0, %pc_lo12(.LCPI2_20) - addi.w $s0, $zero, -1 - lu52i.d $a0, $s0, 2047 + addi.w $a0, $zero, -1 + lu52i.d $a0, $a0, 2047 vreplgr2vr.d $vr1, $a0 vfcmp.sne.d $vr0, $vr0, $vr1 vst $vr0, $sp, 112 @@ -632,9 +628,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu32i.d $s0, 0 - vreplgr2vr.d $vr0, $s0 - vst $vr0, $sp, 48 # 16-byte Folded Spill + vldi $vr0, -1777 vst $vr0, $sp, 96 vld $vr1, $sp, 80 # 16-byte Folded Reload vfcmp.sune.s $vr0, $vr0, $vr1 @@ -677,7 +671,7 @@ main: # @main vld $vr0, $sp, 80 # 16-byte Folded Reload vst $vr0, $sp, 96 vfcmp.sune.s $vr0, $vr0, $vr0 - vst $vr0, $sp, 16 # 16-byte Folded Spill + vst $vr0, $sp, 32 # 16-byte Folded Spill vst $vr0, $sp, 112 addi.d $a0, $sp, 96 addi.d $a1, $sp, 112 @@ -688,7 +682,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 80 # 16-byte Folded Reload vst $vr0, $sp, 96 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vld $vr0, $sp, 32 # 16-byte Folded Reload vst $vr0, $sp, 112 addi.d $a0, $sp, 96 addi.d $a1, $sp, 112 @@ -699,7 +693,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_23) vld $vr0, $a0, %pc_lo12(.LCPI2_23) - vld $vr1, $sp, 48 # 16-byte Folded Reload + vldi $vr1, -1777 vst $vr1, $sp, 96 vld $vr1, $sp, 80 # 16-byte Folded Reload vfcmp.sune.s $vr0, $vr1, $vr0 @@ -778,7 +772,7 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_27) vld $vr1, $sp, 64 # 16-byte Folded Reload vst $vr1, $sp, 96 - vld $vr1, $sp, 48 # 16-byte Folded Reload + vldi $vr1, -1777 vfcmp.sune.d $vr0, $vr1, $vr0 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 @@ -820,7 +814,7 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_29) pcalau12i $a0, %pc_hi20(.LCPI2_30) vld $vr1, $a0, %pc_lo12(.LCPI2_30) - vld $vr2, $sp, 32 # 16-byte Folded Reload + vld $vr2, $sp, 48 # 16-byte Folded Reload vst $vr2, $sp, 96 vfcmp.sune.d $vr0, $vr1, $vr0 vst $vr0, $sp, 112 @@ -846,10 +840,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 136 # 8-byte Folded Reload - ld.d $fp, $sp, 144 # 8-byte Folded Reload - ld.d $ra, $sp, 152 # 8-byte Folded Reload - addi.d $sp, $sp, 160 + ld.d $fp, $sp, 128 # 8-byte Folded Reload + ld.d $ra, $sp, 136 # 8-byte Folded Reload + addi.d $sp, $sp, 144 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sor.dir/lsx-vfcmp_sor.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sor.dir/lsx-vfcmp_sor.s index 4d3e887f..88fd0359 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sor.dir/lsx-vfcmp_sor.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sor.dir/lsx-vfcmp_sor.s @@ -344,9 +344,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 80 # 16-byte Folded Reload vst $vr0, $sp, 96 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1777 vld $vr1, $sp, 64 # 16-byte Folded Reload vfcmp.sor.s $vr0, $vr0, $vr1 vst $vr0, $sp, 112 @@ -512,8 +510,7 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_12) vld $vr1, $sp, 64 # 16-byte Folded Reload vst $vr1, $sp, 96 - lu12i.w $a0, -524288 - vreplgr2vr.w $vr1, $a0 + vldi $vr1, -3200 vfcmp.sor.s $vr0, $vr1, $vr0 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sun.dir/lsx-vfcmp_sun.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sun.dir/lsx-vfcmp_sun.s index 4f4acc44..d6adae8d 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sun.dir/lsx-vfcmp_sun.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcmp_sun.dir/lsx-vfcmp_sun.s @@ -356,10 +356,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -144 - st.d $ra, $sp, 136 # 8-byte Folded Spill - st.d $fp, $sp, 128 # 8-byte Folded Spill - st.d $s0, $sp, 120 # 8-byte Folded Spill + addi.d $sp, $sp, -128 + st.d $ra, $sp, 120 # 8-byte Folded Spill + st.d $fp, $sp, 112 # 8-byte Folded Spill vrepli.b $vr0, 0 vst $vr0, $sp, 64 # 16-byte Folded Spill vst $vr0, $sp, 80 @@ -445,14 +444,11 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $s0, $zero, 0 - ori $a0, $zero, 0 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - vld $vr0, $a1, %pc_lo12(.LCPI2_3) - pcalau12i $a1, %pc_hi20(.LCPI2_4) - vld $vr1, $a1, %pc_lo12(.LCPI2_4) - lu32i.d $a0, -1 - vreplgr2vr.d $vr2, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_3) + vld $vr0, $a0, %pc_lo12(.LCPI2_3) + pcalau12i $a0, %pc_hi20(.LCPI2_4) + vld $vr1, $a0, %pc_lo12(.LCPI2_4) + vldi $vr2, -1552 vst $vr2, $sp, 80 vfcmp.sun.s $vr0, $vr1, $vr0 vst $vr0, $sp, 96 @@ -615,12 +611,13 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 pcalau12i $a0, %pc_hi20(.LCPI2_20) vld $vr0, $a0, %pc_lo12(.LCPI2_20) - vld $vr1, $sp, 64 # 16-byte Folded Reload - vst $vr1, $sp, 80 - lu32i.d $s0, 131072 - vreplgr2vr.d $vr1, $s0 + ori $a0, $zero, 0 + lu32i.d $a0, 131072 + vreplgr2vr.d $vr1, $a0 vfcmp.sun.d $vr0, $vr1, $vr0 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 @@ -743,10 +740,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 120 # 8-byte Folded Reload - ld.d $fp, $sp, 128 # 8-byte Folded Reload - ld.d $ra, $sp, 136 # 8-byte Folded Reload - addi.d $sp, $sp, 144 + ld.d $fp, $sp, 112 # 8-byte Folded Reload + ld.d $ra, $sp, 120 # 8-byte Folded Reload + addi.d $sp, $sp, 128 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcvt-1.dir/lsx-vfcvt-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcvt-1.dir/lsx-vfcvt-1.s index 849a8cdc..32a41804 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcvt-1.dir/lsx-vfcvt-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfcvt-1.dir/lsx-vfcvt-1.s @@ -533,9 +533,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_2) vld $vr0, $a0, %pc_lo12(.LCPI2_2) vst $vr0, $sp, 64 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1777 vfcvtl.s.h $vr0, $vr0 vst $vr0, $sp, 112 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vffint-2.dir/lsx-vffint-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vffint-2.dir/lsx-vffint-2.s index 692e2465..eb070c6e 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vffint-2.dir/lsx-vffint-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vffint-2.dir/lsx-vffint-2.s @@ -536,8 +536,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu52i.d $a0, $zero, -1025 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -784 vst $vr0, $sp, 64 vrepli.b $vr0, -1 vffint.d.l $vr0, $vr0 @@ -701,13 +700,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_26) vld $vr0, $a0, %pc_lo12(.LCPI2_26) - vst $vr0, $sp, 48 pcalau12i $a0, %pc_hi20(.LCPI2_27) - vld $vr0, $a0, %pc_lo12(.LCPI2_27) - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr1, $a0 - vffint.s.l $vr0, $vr1, $vr0 + vld $vr1, $a0, %pc_lo12(.LCPI2_27) + vst $vr0, $sp, 48 + vldi $vr0, -2305 + vffint.s.l $vr0, $vr0, $vr1 vst $vr0, $sp, 96 addi.d $a0, $sp, 48 addi.d $a1, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vffint-3.dir/lsx-vffint-3.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vffint-3.dir/lsx-vffint-3.s index 2dc8a8d4..43ba29df 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vffint-3.dir/lsx-vffint-3.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vffint-3.dir/lsx-vffint-3.s @@ -268,8 +268,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 269056 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -1482 vst $vr0, $sp, 48 vrepli.w $vr0, 22 vffint.s.wu $vr0, $vr0 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfmax_d.dir/lsx-vfmax_d.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfmax_d.dir/lsx-vfmax_d.s index a903a472..8f016b45 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfmax_d.dir/lsx-vfmax_d.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfmax_d.dir/lsx-vfmax_d.s @@ -299,8 +299,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) - ori $a0, $zero, 1024 - vreplgr2vr.h $vr1, $a0 + vldi $vr1, -2812 vst $vr1, $sp, 64 vfmax.d $vr0, $vr0, $vr1 vst $vr0, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfmax_s.dir/lsx-vfmax_s.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfmax_s.dir/lsx-vfmax_s.s index 33905c0a..9112d9c2 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfmax_s.dir/lsx-vfmax_s.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfmax_s.dir/lsx-vfmax_s.s @@ -594,13 +594,11 @@ main: # @main lu32i.d $a1, 0 vreplgr2vr.d $vr0, $a1 vst $vr0, $sp, 48 - addi.w $a1, $zero, -1 - lu32i.d $a1, 0 - vreplgr2vr.d $vr0, $a1 lu32i.d $a0, 152146 lu52i.d $a0, $a0, 1317 - vreplgr2vr.d $vr1, $a0 - vfmin.s $vr0, $vr1, $vr0 + vreplgr2vr.d $vr0, $a0 + vldi $vr1, -1777 + vfmin.s $vr0, $vr0, $vr1 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 addi.d $a1, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfnmadd_d.dir/lsx-vfnmadd_d.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfnmadd_d.dir/lsx-vfnmadd_d.s index 51407a6f..c19984b2 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfnmadd_d.dir/lsx-vfnmadd_d.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfnmadd_d.dir/lsx-vfnmadd_d.s @@ -334,12 +334,12 @@ main: # @main jirl $ra, $ra, 0 ori $a0, $zero, 0 lu32i.d $a0, 65535 - lu52i.d $a1, $a0, -2048 - pcalau12i $a2, %pc_hi20(.LCPI2_3) - vld $vr0, $a2, %pc_lo12(.LCPI2_3) - vreplgr2vr.d $vr1, $a1 - vst $vr1, $sp, 48 + lu52i.d $a0, $a0, -2048 + pcalau12i $a1, %pc_hi20(.LCPI2_3) + vld $vr0, $a1, %pc_lo12(.LCPI2_3) vreplgr2vr.d $vr1, $a0 + vst $vr1, $sp, 48 + vldi $vr1, -1744 vld $vr2, $sp, 32 # 16-byte Folded Reload vfnmadd.d $vr0, $vr2, $vr0, $vr1 vst $vr0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfnmadd_s.dir/lsx-vfnmadd_s.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfnmadd_s.dir/lsx-vfnmadd_s.s index d7ebe9a3..b467da8f 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfnmadd_s.dir/lsx-vfnmadd_s.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfnmadd_s.dir/lsx-vfnmadd_s.s @@ -409,22 +409,22 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -112 - st.d $ra, $sp, 104 # 8-byte Folded Spill - st.d $fp, $sp, 96 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) vld $vr1, $a0, %pc_lo12(.LCPI2_1) pcalau12i $a0, %pc_hi20(.LCPI2_2) vld $vr2, $a0, %pc_lo12(.LCPI2_2) - vst $vr0, $sp, 64 + vst $vr0, $sp, 48 vfnmadd.s $vr0, $vr2, $vr1, $vr2 - vst $vr0, $sp, 80 + vst $vr0, $sp, 64 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $fp, $a0, %pc_lo12(.L.str.5) - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 24 move $a3, $fp @@ -434,13 +434,13 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_3) pcalau12i $a0, %pc_hi20(.LCPI2_4) vld $vr1, $a0, %pc_lo12(.LCPI2_4) - vst $vr0, $sp, 64 + vst $vr0, $sp, 48 vrepli.b $vr0, 0 - vst $vr0, $sp, 48 # 16-byte Folded Spill + vst $vr0, $sp, 32 # 16-byte Folded Spill vfnmadd.s $vr0, $vr1, $vr0, $vr1 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 31 move $a3, $fp @@ -448,17 +448,15 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_5) vld $vr0, $a0, %pc_lo12(.LCPI2_5) - vst $vr0, $sp, 64 pcalau12i $a0, %pc_hi20(.LCPI2_6) - vld $vr0, $a0, %pc_lo12(.LCPI2_6) - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr1, $a0 - vld $vr2, $sp, 48 # 16-byte Folded Reload - vfnmadd.s $vr0, $vr2, $vr1, $vr0 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vld $vr1, $a0, %pc_lo12(.LCPI2_6) + vst $vr0, $sp, 48 + vldi $vr0, -1777 + vld $vr2, $sp, 32 # 16-byte Folded Reload + vfnmadd.s $vr0, $vr2, $vr0, $vr1 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 38 move $a3, $fp @@ -470,11 +468,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_8) pcalau12i $a0, %pc_hi20(.LCPI2_9) vld $vr2, $a0, %pc_lo12(.LCPI2_9) - vst $vr0, $sp, 64 + vst $vr0, $sp, 48 vfnmadd.s $vr0, $vr2, $vr1, $vr1 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 45 move $a3, $fp @@ -488,26 +486,24 @@ main: # @main vld $vr2, $a0, %pc_lo12(.LCPI2_12) pcalau12i $a0, %pc_hi20(.LCPI2_13) vld $vr3, $a0, %pc_lo12(.LCPI2_13) - vst $vr0, $sp, 64 + vst $vr0, $sp, 48 vfnmadd.s $vr0, $vr3, $vr2, $vr1 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 52 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -524288 - vreplgr2vr.w $vr0, $a0 - vst $vr0, $sp, 32 # 16-byte Folded Spill - vst $vr0, $sp, 64 - vld $vr0, $sp, 48 # 16-byte Folded Reload + vldi $vr0, -3200 + vst $vr0, $sp, 48 + vld $vr0, $sp, 32 # 16-byte Folded Reload vfnmadd.s $vr0, $vr0, $vr0, $vr0 vst $vr0, $sp, 16 # 16-byte Folded Spill - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 59 move $a3, $fp @@ -517,13 +513,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_14) pcalau12i $a0, %pc_hi20(.LCPI2_15) vld $vr1, $a0, %pc_lo12(.LCPI2_15) - vst $vr0, $sp, 64 - lu12i.w $a0, 260096 - vreplgr2vr.w $vr0, $a0 + vst $vr0, $sp, 48 + vldi $vr0, -1424 vfnmadd.s $vr0, $vr1, $vr0, $vr1 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 66 move $a3, $fp @@ -535,11 +530,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_17) pcalau12i $a0, %pc_hi20(.LCPI2_18) vld $vr2, $a0, %pc_lo12(.LCPI2_18) - vst $vr0, $sp, 64 + vst $vr0, $sp, 48 vfnmadd.s $vr0, $vr2, $vr2, $vr1 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 73 move $a3, $fp @@ -551,22 +546,22 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_20) pcalau12i $a0, %pc_hi20(.LCPI2_21) vld $vr2, $a0, %pc_lo12(.LCPI2_21) - vst $vr0, $sp, 64 + vst $vr0, $sp, 48 vfnmadd.s $vr0, $vr2, $vr2, $vr1 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 80 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 32 # 16-byte Folded Reload - vst $vr0, $sp, 64 + vldi $vr0, -3200 + vst $vr0, $sp, 48 vld $vr0, $sp, 16 # 16-byte Folded Reload - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 87 move $a3, $fp @@ -574,13 +569,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_22) vld $vr0, $a0, %pc_lo12(.LCPI2_22) + vldi $vr1, -3200 + vst $vr1, $sp, 48 vld $vr1, $sp, 32 # 16-byte Folded Reload - vst $vr1, $sp, 64 - vld $vr1, $sp, 48 # 16-byte Folded Reload vfnmadd.s $vr0, $vr0, $vr1, $vr1 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 94 move $a3, $fp @@ -588,13 +583,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_23) vld $vr0, $a0, %pc_lo12(.LCPI2_23) + vldi $vr1, -3200 + vst $vr1, $sp, 48 vld $vr1, $sp, 32 # 16-byte Folded Reload - vst $vr1, $sp, 64 - vld $vr1, $sp, 48 # 16-byte Folded Reload vfnmadd.s $vr0, $vr0, $vr1, $vr1 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 101 move $a3, $fp @@ -606,22 +601,22 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_25) pcalau12i $a0, %pc_hi20(.LCPI2_26) vld $vr2, $a0, %pc_lo12(.LCPI2_26) - vst $vr0, $sp, 64 + vst $vr0, $sp, 48 vfnmadd.s $vr0, $vr2, $vr2, $vr1 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 108 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 32 # 16-byte Folded Reload - vst $vr0, $sp, 64 + vldi $vr0, -3200 + vst $vr0, $sp, 48 vld $vr0, $sp, 16 # 16-byte Folded Reload - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 115 move $a3, $fp @@ -635,23 +630,23 @@ main: # @main vld $vr2, $a0, %pc_lo12(.LCPI2_29) pcalau12i $a0, %pc_hi20(.LCPI2_30) vld $vr3, $a0, %pc_lo12(.LCPI2_30) - vst $vr0, $sp, 64 + vst $vr0, $sp, 48 vfnmsub.s $vr0, $vr3, $vr2, $vr1 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 122 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vldi $vr0, -3200 + vst $vr0, $sp, 48 vld $vr0, $sp, 32 # 16-byte Folded Reload - vst $vr0, $sp, 64 - vld $vr0, $sp, 48 # 16-byte Folded Reload vfnmsub.s $vr0, $vr0, $vr0, $vr0 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 129 move $a3, $fp @@ -661,12 +656,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_31) pcalau12i $a0, %pc_hi20(.LCPI2_32) vld $vr1, $a0, %pc_lo12(.LCPI2_32) - vst $vr0, $sp, 64 - vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 48 + vld $vr0, $sp, 32 # 16-byte Folded Reload vfnmsub.s $vr0, $vr0, $vr0, $vr1 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 136 move $a3, $fp @@ -676,12 +671,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_33) pcalau12i $a0, %pc_hi20(.LCPI2_34) vld $vr1, $a0, %pc_lo12(.LCPI2_34) - vst $vr0, $sp, 64 - vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 48 + vld $vr0, $sp, 32 # 16-byte Folded Reload vfnmsub.s $vr0, $vr0, $vr0, $vr1 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 143 move $a3, $fp @@ -690,21 +685,21 @@ main: # @main lu12i.w $a0, -207306 ori $a0, $a0, 867 vreplgr2vr.w $vr0, $a0 - vst $vr0, $sp, 64 - vld $vr1, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 48 + vld $vr1, $sp, 32 # 16-byte Folded Reload vfnmsub.s $vr0, $vr1, $vr1, $vr0 - vst $vr0, $sp, 80 - addi.d $a0, $sp, 64 - addi.d $a1, $sp, 80 + vst $vr0, $sp, 64 + addi.d $a0, $sp, 48 + addi.d $a1, $sp, 64 ori $a2, $zero, 16 ori $a4, $zero, 150 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $fp, $sp, 96 # 8-byte Folded Reload - ld.d $ra, $sp, 104 # 8-byte Folded Reload - addi.d $sp, $sp, 112 + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfrint_d.dir/lsx-vfrint_d.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfrint_d.dir/lsx-vfrint_d.s index 4e31e0f3..9e8152fa 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfrint_d.dir/lsx-vfrint_d.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfrint_d.dir/lsx-vfrint_d.s @@ -645,8 +645,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_13) vld $vr0, $a0, %pc_lo12(.LCPI2_13) - lu52i.d $a0, $zero, 1023 - vreplgr2vr.d $vr1, $a0 + vldi $vr1, -912 vst $vr1, $sp, 96 vfrintrp.d $vr0, $vr0 vst $vr0, $sp, 112 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfrint_s.dir/lsx-vfrint_s.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfrint_s.dir/lsx-vfrint_s.s index 367ff109..ef41ba5c 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfrint_s.dir/lsx-vfrint_s.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfrint_s.dir/lsx-vfrint_s.s @@ -668,8 +668,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_14) vld $vr0, $a0, %pc_lo12(.LCPI2_14) - lu12i.w $a0, 260096 - vreplgr2vr.d $vr1, $a0 + vldi $vr1, -1168 vst $vr1, $sp, 96 vfrintrp.s $vr0, $vr0 vst $vr0, $sp, 112 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfsqrt_s.dir/lsx-vfsqrt_s.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfsqrt_s.dir/lsx-vfsqrt_s.s index e30434c1..fb47e9da 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfsqrt_s.dir/lsx-vfsqrt_s.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vfsqrt_s.dir/lsx-vfsqrt_s.s @@ -531,11 +531,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 479232 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3211 vst $vr0, $sp, 64 - lu12i.w $a0, 40960 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3318 vfrecip.s $vr0, $vr0 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vftint-3.dir/lsx-vftint-3.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vftint-3.dir/lsx-vftint-3.s index 8a26414b..c4cd7de5 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vftint-3.dir/lsx-vftint-3.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vftint-3.dir/lsx-vftint-3.s @@ -567,7 +567,6 @@ main: # @main st.d $ra, $sp, 184 # 8-byte Folded Spill st.d $fp, $sp, 176 # 8-byte Folded Spill st.d $s0, $sp, 168 # 8-byte Folded Spill - st.d $s1, $sp, 160 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) @@ -1069,16 +1068,11 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $s1, -524288 - move $a0, $s1 + lu12i.w $a0, -524288 lu32i.d $a0, 0 vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 128 - lu12i.w $a0, -4081 - ori $a0, $a0, 3840 - lu32i.d $a0, -256 - lu52i.d $a0, $a0, 15 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1686 vftintrm.w.s $vr0, $vr0 vst $vr0, $sp, 144 addi.d $a0, $sp, 128 @@ -1788,7 +1782,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_78) vld $vr0, $a0, %pc_lo12(.LCPI2_78) - vreplgr2vr.w $vr1, $s1 + vldi $vr1, -3200 vst $vr1, $sp, 128 vftintrp.w.d $vr0, $vr0, $vr0 vst $vr0, $sp, 144 @@ -1825,8 +1819,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr1, $sp, 112 # 16-byte Folded Reload vst $vr1, $sp, 128 - ori $a0, $zero, 2048 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2808 vftintrm.w.d $vr0, $vr1, $vr0 vst $vr0, $sp, 144 addi.d $a0, $sp, 128 @@ -1851,13 +1844,11 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 pcalau12i $a0, %pc_hi20(.LCPI2_81) vld $vr0, $a0, %pc_lo12(.LCPI2_81) - lu12i.w $a0, 4080 - ori $a0, $a0, 255 - vreplgr2vr.d $vr1, $a0 + vld $vr1, $sp, 112 # 16-byte Folded Reload + vst $vr1, $sp, 128 + vldi $vr1, -1787 vftintrm.w.d $vr0, $vr0, $vr1 vst $vr0, $sp, 144 addi.d $a0, $sp, 128 @@ -1982,12 +1973,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $s0, $zero, -1 vld $vr1, $sp, 112 # 16-byte Folded Reload vst $vr1, $sp, 128 - move $a0, $s0 - lu32i.d $a0, 65535 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1729 vftint.w.d $vr0, $vr1, $vr0 vst $vr0, $sp, 144 addi.d $a0, $sp, 128 @@ -2036,8 +2024,7 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_88) vld $vr1, $sp, 112 # 16-byte Folded Reload vst $vr1, $sp, 128 - lu32i.d $s0, 0 - vreplgr2vr.d $vr1, $s0 + vldi $vr1, -1777 vftint.w.d $vr0, $vr0, $vr1 vst $vr0, $sp, 144 addi.d $a0, $sp, 128 @@ -2048,7 +2035,6 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s1, $sp, 160 # 8-byte Folded Reload ld.d $s0, $sp, 168 # 8-byte Folded Reload ld.d $fp, $sp, 176 # 8-byte Folded Reload ld.d $ra, $sp, 184 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vftint-4.dir/lsx-vftint-4.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vftint-4.dir/lsx-vftint-4.s index 01aa0274..8f175c02 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vftint-4.dir/lsx-vftint-4.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vftint-4.dir/lsx-vftint-4.s @@ -569,8 +569,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 64 # 16-byte Folded Reload vst $vr0, $sp, 80 - lu12i.w $a0, 8 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2688 vftintrz.lu.d $vr0, $vr0 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhaddw-1.dir/lsx-vhaddw-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhaddw-1.dir/lsx-vhaddw-1.s index 5e31edf9..4c398e7d 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhaddw-1.dir/lsx-vhaddw-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhaddw-1.dir/lsx-vhaddw-1.s @@ -646,13 +646,12 @@ main: # @main vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 16 # 16-byte Folded Spill vst $vr0, $sp, 80 - lu52i.d $a0, $zero, 1023 - vreplgr2vr.d $vr0, $a0 lu12i.w $a0, 524287 ori $a0, $a0, 4095 lu52i.d $a0, $a0, -2048 - vreplgr2vr.d $vr1, $a0 - vhaddw.h.b $vr0, $vr0, $vr1 + vreplgr2vr.d $vr0, $a0 + vldi $vr1, -912 + vhaddw.h.b $vr0, $vr1, $vr0 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 addi.d $a1, $sp, 96 @@ -843,8 +842,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_22) vld $vr1, $a0, %pc_lo12(.LCPI2_22) vst $vr0, $sp, 80 - lu12i.w $a0, 479232 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3211 vhaddw.w.h $vr0, $vr1, $vr0 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhaddw-2.dir/lsx-vhaddw-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhaddw-2.dir/lsx-vhaddw-2.s index f50b199f..942b1771 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhaddw-2.dir/lsx-vhaddw-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhaddw-2.dir/lsx-vhaddw-2.s @@ -621,10 +621,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -176 - st.d $ra, $sp, 168 # 8-byte Folded Spill - st.d $fp, $sp, 160 # 8-byte Folded Spill - st.d $s0, $sp, 152 # 8-byte Folded Spill + addi.d $sp, $sp, -160 + st.d $ra, $sp, 152 # 8-byte Folded Spill + st.d $fp, $sp, 144 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) @@ -948,8 +947,8 @@ main: # @main vst $vr0, $sp, 112 pcalau12i $a0, %pc_hi20(.LCPI2_34) vld $vr0, $a0, %pc_lo12(.LCPI2_34) - addi.w $s0, $zero, -1 - lu52i.d $a0, $s0, -17 + addi.w $a0, $zero, -1 + lu52i.d $a0, $a0, -17 vreplgr2vr.d $vr1, $a0 vhaddw.du.wu $vr0, $vr0, $vr1 vst $vr0, $sp, 128 @@ -1124,8 +1123,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu32i.d $s0, 0 - vreplgr2vr.d $vr0, $s0 + vldi $vr0, -1777 vst $vr0, $sp, 112 vld $vr0, $sp, 96 # 16-byte Folded Reload vld $vr1, $sp, 64 # 16-byte Folded Reload @@ -1309,10 +1307,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 152 # 8-byte Folded Reload - ld.d $fp, $sp, 160 # 8-byte Folded Reload - ld.d $ra, $sp, 168 # 8-byte Folded Reload - addi.d $sp, $sp, 176 + ld.d $fp, $sp, 144 # 8-byte Folded Reload + ld.d $ra, $sp, 152 # 8-byte Folded Reload + addi.d $sp, $sp, 160 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhsubw-1.dir/lsx-vhsubw-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhsubw-1.dir/lsx-vhsubw-1.s index 13e2c84f..b1d8ced7 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhsubw-1.dir/lsx-vhsubw-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhsubw-1.dir/lsx-vhsubw-1.s @@ -990,9 +990,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_40) vld $vr0, $a0, %pc_lo12(.LCPI2_40) vst $vr0, $sp, 64 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1552 vld $vr1, $sp, 48 # 16-byte Folded Reload vhsubw.q.d $vr0, $vr1, $vr0 vst $vr0, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhsubw-2.dir/lsx-vhsubw-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhsubw-2.dir/lsx-vhsubw-2.s index 9b3cbc18..6fe65cf4 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhsubw-2.dir/lsx-vhsubw-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vhsubw-2.dir/lsx-vhsubw-2.s @@ -488,7 +488,6 @@ main: # @main st.d $ra, $sp, 152 # 8-byte Folded Spill st.d $fp, $sp, 144 # 8-byte Folded Spill st.d $s0, $sp, 136 # 8-byte Folded Spill - st.d $s1, $sp, 128 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) @@ -566,9 +565,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -2305 vst $vr0, $sp, 96 vrepli.b $vr0, -1 vst $vr0, $sp, 64 # 16-byte Folded Spill @@ -582,17 +579,11 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $s0, $zero, 0 - ori $a0, $zero, 0 - lu32i.d $a0, 255 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 96 pcalau12i $a0, %pc_hi20(.LCPI2_10) vld $vr0, $a0, %pc_lo12(.LCPI2_10) - ori $a0, $zero, 0 - lu32i.d $a0, -256 - lu52i.d $a0, $a0, 15 - vreplgr2vr.d $vr1, $a0 + vldi $vr1, -1776 + vst $vr1, $sp, 96 + vldi $vr1, -1696 vhsubw.wu.hu $vr0, $vr1, $vr0 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 @@ -799,9 +790,9 @@ main: # @main jirl $ra, $ra, 0 vld $vr1, $sp, 80 # 16-byte Folded Reload vst $vr1, $sp, 96 - ori $s1, $zero, 0 - lu32i.d $s1, 32768 - vreplgr2vr.d $vr0, $s1 + ori $s0, $zero, 0 + lu32i.d $s0, 32768 + vreplgr2vr.d $vr0, $s0 vhsubw.du.wu $vr0, $vr1, $vr0 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 @@ -830,8 +821,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_22) vld $vr0, $a0, %pc_lo12(.LCPI2_22) vst $vr0, $sp, 96 - lu32i.d $s0, -1 - vreplgr2vr.d $vr0, $s0 + vldi $vr0, -1552 vld $vr1, $sp, 80 # 16-byte Folded Reload vhsubw.qu.du $vr0, $vr0, $vr1 vst $vr0, $sp, 112 @@ -971,7 +961,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_36) vld $vr1, $a0, %pc_lo12(.LCPI2_36) vst $vr0, $sp, 96 - lu52i.d $a0, $s1, -1016 + lu52i.d $a0, $s0, -1016 vreplgr2vr.d $vr0, $a0 vhsubw.qu.du $vr0, $vr0, $vr1 vst $vr0, $sp, 112 @@ -1029,7 +1019,6 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s1, $sp, 128 # 8-byte Folded Reload ld.d $s0, $sp, 136 # 8-byte Folded Reload ld.d $fp, $sp, 144 # 8-byte Folded Reload ld.d $ra, $sp, 152 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vilvh.dir/lsx-vilvh.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vilvh.dir/lsx-vilvh.s index 9adc6fab..ea92d41a 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vilvh.dir/lsx-vilvh.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vilvh.dir/lsx-vilvh.s @@ -484,8 +484,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) - lu12i.w $a0, 2 - vreplgr2vr.h $vr1, $a0 + vldi $vr1, -2784 vst $vr1, $sp, 96 vld $vr1, $sp, 80 # 16-byte Folded Reload vilvh.b $vr0, $vr0, $vr1 @@ -616,9 +615,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -2305 vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload vld $vr1, $sp, 80 # 16-byte Folded Reload @@ -642,10 +639,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 32 # 16-byte Folded Spill + vldi $vr0, -1777 vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload vld $vr1, $sp, 80 # 16-byte Folded Reload @@ -659,9 +653,8 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -393206 - lu52i.d $a0, $a0, 0 + lu32i.d $s0, -393206 + lu52i.d $a0, $s0, 0 vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 96 vrepli.h $vr0, 10 @@ -703,7 +696,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 32 # 16-byte Folded Reload + vldi $vr0, -1777 vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload vst $vr0, $sp, 112 @@ -906,8 +899,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu32i.d $s0, -1 - vreplgr2vr.d $vr0, $s0 + vldi $vr0, -1552 vst $vr0, $sp, 96 vilvh.d $vr0, $vr0, $vr0 vst $vr0, $sp, 112 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vilvl.dir/lsx-vilvl.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vilvl.dir/lsx-vilvl.s index 9176ef92..2f0dda9a 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vilvl.dir/lsx-vilvl.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vilvl.dir/lsx-vilvl.s @@ -922,13 +922,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_34) vld $vr0, $a0, %pc_lo12(.LCPI2_34) - vst $vr0, $sp, 80 pcalau12i $a0, %pc_hi20(.LCPI2_35) - vld $vr0, $a0, %pc_lo12(.LCPI2_35) - ori $a0, $zero, 0 - lu32i.d $a0, -65536 - vreplgr2vr.d $vr1, $a0 - vilvl.h $vr0, $vr0, $vr1 + vld $vr1, $a0, %pc_lo12(.LCPI2_35) + vst $vr0, $sp, 80 + vldi $vr0, -1600 + vilvl.h $vr0, $vr1, $vr0 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 addi.d $a1, $sp, 96 @@ -1035,8 +1033,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_43) vld $vr1, $a0, %pc_lo12(.LCPI2_43) vst $vr0, $sp, 80 - lu12i.w $a0, 1 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2800 vilvl.d $vr0, $vr1, $vr0 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vinsgr2vr.dir/lsx-vinsgr2vr.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vinsgr2vr.dir/lsx-vinsgr2vr.s index ce5c8bc4..acefa9c7 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vinsgr2vr.dir/lsx-vinsgr2vr.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vinsgr2vr.dir/lsx-vinsgr2vr.s @@ -444,8 +444,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -524288 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3200 vst $vr0, $sp, 48 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vldi.dir/lsx-vldi.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vldi.dir/lsx-vldi.s index bde403c0..75d0c3d3 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vldi.dir/lsx-vldi.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vldi.dir/lsx-vldi.s @@ -231,110 +231,99 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -96 - st.d $ra, $sp, 88 # 8-byte Folded Spill - st.d $fp, $sp, 80 # 8-byte Folded Spill + addi.d $sp, $sp, -48 + st.d $ra, $sp, 40 # 8-byte Folded Spill + st.d $fp, $sp, 32 # 8-byte Folded Spill vrepli.h $vr0, 163 - vst $vr0, $sp, 48 + vst $vr0, $sp, 0 vldi $vr0, 1187 - vst $vr0, $sp, 64 + vst $vr0, $sp, 16 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $fp, $a0, %pc_lo12(.L.str.5) - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 21 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.d $vr0, -491 - vst $vr0, $sp, 48 + vst $vr0, $sp, 0 vldi $vr0, 3605 - vst $vr0, $sp, 64 - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + vst $vr0, $sp, 16 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 25 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.b $vr0, -20 - vst $vr0, $sp, 48 + vst $vr0, $sp, 0 vldi $vr0, 1004 - vst $vr0, $sp, 64 - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + vst $vr0, $sp, 16 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 29 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -4081 - ori $a0, $a0, 3840 - lu32i.d $a0, -256 - lu52i.d $a0, $a0, 15 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 32 # 16-byte Folded Spill - vst $vr0, $sp, 48 vldi $vr0, -1686 - vst $vr0, $sp, 64 - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + vst $vr0, $sp, 0 + vst $vr0, $sp, 16 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 33 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.h $vr0, 77 - vst $vr0, $sp, 48 + vst $vr0, $sp, 0 vldi $vr0, 1101 - vst $vr0, $sp, 64 - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + vst $vr0, $sp, 16 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 37 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 40960 - vreplgr2vr.w $vr0, $a0 - vst $vr0, $sp, 16 # 16-byte Folded Spill - vst $vr0, $sp, 48 vldi $vr0, -3318 - vst $vr0, $sp, 64 - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + vst $vr0, $sp, 0 + vst $vr0, $sp, 16 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 41 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 32 # 16-byte Folded Reload - vst $vr0, $sp, 48 vldi $vr0, -1686 - vst $vr0, $sp, 64 - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + vst $vr0, $sp, 0 + vst $vr0, $sp, 16 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 45 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 16 # 16-byte Folded Reload - vst $vr0, $sp, 48 vldi $vr0, -3318 - vst $vr0, $sp, 64 - addi.d $a0, $sp, 48 - addi.d $a1, $sp, 64 + vst $vr0, $sp, 0 + vst $vr0, $sp, 16 + addi.d $a0, $sp, 0 + addi.d $a1, $sp, 16 ori $a2, $zero, 16 ori $a4, $zero, 49 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $fp, $sp, 80 # 8-byte Folded Reload - ld.d $ra, $sp, 88 # 8-byte Folded Reload - addi.d $sp, $sp, 96 + ld.d $fp, $sp, 32 # 8-byte Folded Reload + ld.d $ra, $sp, 40 # 8-byte Folded Reload + addi.d $sp, $sp, 48 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwev-2.dir/lsx-vmaddwev-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwev-2.dir/lsx-vmaddwev-2.s index dc55869f..75753f87 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwev-2.dir/lsx-vmaddwev-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwev-2.dir/lsx-vmaddwev-2.s @@ -579,10 +579,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -112 - st.d $ra, $sp, 104 # 8-byte Folded Spill - st.d $fp, $sp, 96 # 8-byte Folded Spill - st.d $s0, $sp, 88 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill vrepli.b $vr1, 0 vst $vr1, $sp, 32 # 16-byte Folded Spill vst $vr1, $sp, 48 @@ -712,12 +711,10 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_13) vld $vr0, $a0, %pc_lo12(.LCPI2_13) - vst $vr0, $sp, 48 pcalau12i $a0, %pc_hi20(.LCPI2_14) vld $vr1, $a0, %pc_lo12(.LCPI2_14) - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr2, $a0 + vst $vr0, $sp, 48 + vldi $vr2, -2305 vmaddwev.h.bu $vr0, $vr2, $vr1 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 @@ -798,7 +795,6 @@ main: # @main vst $vr2, $sp, 48 pcalau12i $a0, %pc_hi20(.LCPI2_21) vld $vr0, $a0, %pc_lo12(.LCPI2_21) - ori $s0, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, 32768 vreplgr2vr.d $vr1, $a0 @@ -935,15 +931,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_38) vld $vr0, $a0, %pc_lo12(.LCPI2_38) - vst $vr0, $sp, 48 pcalau12i $a0, %pc_hi20(.LCPI2_39) - vld $vr0, $a0, %pc_lo12(.LCPI2_39) - ori $a0, $zero, 0 - lu32i.d $a0, -256 - lu52i.d $a0, $a0, 15 - vreplgr2vr.d $vr1, $a0 + vld $vr1, $a0, %pc_lo12(.LCPI2_39) + vst $vr0, $sp, 48 + vldi $vr0, -1696 vld $vr2, $sp, 32 # 16-byte Folded Reload - vmaddwev.q.du $vr2, $vr0, $vr1 + vmaddwev.q.du $vr2, $vr1, $vr0 vst $vr2, $sp, 64 addi.d $a0, $sp, 48 addi.d $a1, $sp, 64 @@ -1057,14 +1050,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_47) vld $vr0, $a0, %pc_lo12(.LCPI2_47) - vst $vr0, $sp, 48 pcalau12i $a0, %pc_hi20(.LCPI2_48) - vld $vr0, $a0, %pc_lo12(.LCPI2_48) - lu32i.d $s0, -1 - vreplgr2vr.d $vr1, $s0 + vld $vr1, $a0, %pc_lo12(.LCPI2_48) + vst $vr0, $sp, 48 + vldi $vr0, -1552 vrepli.d $vr2, 20 - vmaddwev.q.du $vr0, $vr2, $vr1 - vst $vr0, $sp, 64 + vmaddwev.q.du $vr1, $vr2, $vr0 + vst $vr1, $sp, 64 addi.d $a0, $sp, 48 addi.d $a1, $sp, 64 ori $a2, $zero, 16 @@ -1073,10 +1065,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 88 # 8-byte Folded Reload - ld.d $fp, $sp, 96 # 8-byte Folded Reload - ld.d $ra, $sp, 104 # 8-byte Folded Reload - addi.d $sp, $sp, 112 + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwev-3.dir/lsx-vmaddwev-3.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwev-3.dir/lsx-vmaddwev-3.s index c64c8014..16877b69 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwev-3.dir/lsx-vmaddwev-3.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwev-3.dir/lsx-vmaddwev-3.s @@ -786,9 +786,7 @@ main: # @main lu12i.w $a0, 991 ori $a0, $a0, 4034 vreplgr2vr.d $vr0, $a0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - vreplgr2vr.d $vr1, $a0 + vldi $vr1, -1552 vmaddwev.d.wu.w $vr1, $vr0, $vr0 vst $vr1, $sp, 64 addi.d $a0, $sp, 48 @@ -866,14 +864,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_28) vld $vr0, $a0, %pc_lo12(.LCPI2_28) - vst $vr0, $sp, 48 pcalau12i $a0, %pc_hi20(.LCPI2_29) - vld $vr0, $a0, %pc_lo12(.LCPI2_29) - addi.w $a0, $zero, -1 - lu32i.d $a0, 65535 - vreplgr2vr.d $vr1, $a0 - vmaddwev.d.wu.w $vr1, $vr1, $vr0 - vst $vr1, $sp, 64 + vld $vr1, $a0, %pc_lo12(.LCPI2_29) + vst $vr0, $sp, 48 + vldi $vr0, -1729 + vmaddwev.d.wu.w $vr0, $vr0, $vr1 + vst $vr0, $sp, 64 addi.d $a0, $sp, 48 addi.d $a1, $sp, 64 ori $a2, $zero, 16 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwod-1.dir/lsx-vmaddwod-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwod-1.dir/lsx-vmaddwod-1.s index 88a58370..562bedd1 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwod-1.dir/lsx-vmaddwod-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwod-1.dir/lsx-vmaddwod-1.s @@ -649,10 +649,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -176 - st.d $ra, $sp, 168 # 8-byte Folded Spill - st.d $fp, $sp, 160 # 8-byte Folded Spill - st.d $s0, $sp, 152 # 8-byte Folded Spill + addi.d $sp, $sp, -160 + st.d $ra, $sp, 152 # 8-byte Folded Spill + st.d $fp, $sp, 144 # 8-byte Folded Spill vrepli.b $vr0, 0 vst $vr0, $sp, 96 # 16-byte Folded Spill vst $vr0, $sp, 112 @@ -948,9 +947,8 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_38) vst $vr0, $sp, 112 pcalau12i $a0, %pc_hi20(.LCPI2_39) - addi.w $s0, $zero, -1 vld $vr1, $a0, %pc_lo12(.LCPI2_39) - move $a0, $s0 + addi.w $a0, $zero, -1 lu32i.d $a0, -65536 lu52i.d $a0, $a0, 3 vreplgr2vr.d $vr2, $a0 @@ -968,8 +966,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_41) vld $vr1, $a0, %pc_lo12(.LCPI2_41) vst $vr0, $sp, 112 - lu32i.d $s0, 65535 - vreplgr2vr.d $vr0, $s0 + vldi $vr0, -1729 vld $vr2, $sp, 96 # 16-byte Folded Reload vmaddwod.d.w $vr2, $vr1, $vr0 vst $vr2, $sp, 128 @@ -1197,14 +1194,13 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vrepli.b $vr0, -2 pcalau12i $a0, %pc_hi20(.LCPI2_64) - vld $vr1, $a0, %pc_lo12(.LCPI2_64) - vst $vr0, $sp, 112 - lu12i.w $a0, 1 - vreplgr2vr.h $vr2, $a0 - vmaddwod.q.d $vr0, $vr2, $vr1 - vst $vr0, $sp, 128 + vld $vr0, $a0, %pc_lo12(.LCPI2_64) + vrepli.b $vr1, -2 + vst $vr1, $sp, 112 + vldi $vr2, -2800 + vmaddwod.q.d $vr1, $vr2, $vr0 + vst $vr1, $sp, 128 addi.d $a0, $sp, 112 addi.d $a1, $sp, 128 ori $a2, $zero, 16 @@ -1484,10 +1480,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 152 # 8-byte Folded Reload - ld.d $fp, $sp, 160 # 8-byte Folded Reload - ld.d $ra, $sp, 168 # 8-byte Folded Reload - addi.d $sp, $sp, 176 + ld.d $fp, $sp, 144 # 8-byte Folded Reload + ld.d $ra, $sp, 152 # 8-byte Folded Reload + addi.d $sp, $sp, 160 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwod-3.dir/lsx-vmaddwod-3.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwod-3.dir/lsx-vmaddwod-3.s index c3d499bb..6cd1cd61 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwod-3.dir/lsx-vmaddwod-3.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaddwod-3.dir/lsx-vmaddwod-3.s @@ -729,10 +729,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -160 - st.d $ra, $sp, 152 # 8-byte Folded Spill - st.d $fp, $sp, 144 # 8-byte Folded Spill - st.d $s0, $sp, 136 # 8-byte Folded Spill + addi.d $sp, $sp, -144 + st.d $ra, $sp, 136 # 8-byte Folded Spill + st.d $fp, $sp, 128 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) vrepli.b $vr1, 0 @@ -847,7 +846,6 @@ main: # @main jirl $ra, $ra, 0 vld $vr1, $sp, 80 # 16-byte Folded Reload vst $vr1, $sp, 96 - ori $s0, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, 32768 vreplgr2vr.d $vr0, $a0 @@ -946,8 +944,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_25) vld $vr0, $a0, %pc_lo12(.LCPI2_25) vst $vr0, $sp, 96 - lu32i.d $s0, -1 - vreplgr2vr.d $vr1, $s0 + vldi $vr1, -1552 vld $vr2, $sp, 80 # 16-byte Folded Reload vmaddwod.w.hu.h $vr0, $vr1, $vr2 vst $vr0, $sp, 112 @@ -1183,11 +1180,11 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 260096 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -1424 vst $vr0, $sp, 96 - pcalau12i $a1, %pc_hi20(.LCPI2_58) - vld $vr1, $a1, %pc_lo12(.LCPI2_58) + pcalau12i $a0, %pc_hi20(.LCPI2_58) + vld $vr1, $a0, %pc_lo12(.LCPI2_58) + lu12i.w $a0, 260096 ori $a0, $a0, 1 lu52i.d $a0, $a0, 1016 vreplgr2vr.d $vr2, $a0 @@ -1249,10 +1246,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 136 # 8-byte Folded Reload - ld.d $fp, $sp, 144 # 8-byte Folded Reload - ld.d $ra, $sp, 152 # 8-byte Folded Reload - addi.d $sp, $sp, 160 + ld.d $fp, $sp, 128 # 8-byte Folded Reload + ld.d $ra, $sp, 136 # 8-byte Folded Reload + addi.d $sp, $sp, 144 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaxi-1.dir/lsx-vmaxi-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaxi-1.dir/lsx-vmaxi-1.s index d5cee972..fd862017 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaxi-1.dir/lsx-vmaxi-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmaxi-1.dir/lsx-vmaxi-1.s @@ -306,8 +306,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 1 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2800 vst $vr0, $sp, 48 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 @@ -560,9 +559,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 511 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -2273 vst $vr0, $sp, 48 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmod-1.dir/lsx-vmod-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmod-1.dir/lsx-vmod-1.s index a06068cb..a5a43498 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmod-1.dir/lsx-vmod-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmod-1.dir/lsx-vmod-1.s @@ -354,8 +354,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 1 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2800 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmuh-2.dir/lsx-vmuh-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmuh-2.dir/lsx-vmuh-2.s index 72fec4fe..ffbff29c 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmuh-2.dir/lsx-vmuh-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmuh-2.dir/lsx-vmuh-2.s @@ -975,13 +975,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_36) vld $vr0, $a0, %pc_lo12(.LCPI2_36) - vst $vr0, $sp, 96 pcalau12i $a0, %pc_hi20(.LCPI2_37) - vld $vr0, $a0, %pc_lo12(.LCPI2_37) - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr1, $a0 - vmuh.du $vr0, $vr0, $vr1 + vld $vr1, $a0, %pc_lo12(.LCPI2_37) + vst $vr0, $sp, 96 + vldi $vr0, -2305 + vmuh.du $vr0, $vr1, $vr0 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 addi.d $a1, $sp, 112 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwev-1.dir/lsx-vmulwev-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwev-1.dir/lsx-vmulwev-1.s index a95e057e..c478144b 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwev-1.dir/lsx-vmulwev-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwev-1.dir/lsx-vmulwev-1.s @@ -853,9 +853,7 @@ main: # @main lu32i.d $a0, -255 vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 96 - addi.w $a0, $zero, -1 - lu32i.d $a0, 255 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1761 vld $vr1, $sp, 48 # 16-byte Folded Reload vmulwev.w.h $vr0, $vr1, $vr0 vst $vr0, $sp, 112 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwev-2.dir/lsx-vmulwev-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwev-2.dir/lsx-vmulwev-2.s index da488ab2..2eadcc98 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwev-2.dir/lsx-vmulwev-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwev-2.dir/lsx-vmulwev-2.s @@ -756,8 +756,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 80 # 16-byte Folded Reload vst $vr0, $sp, 96 - lu12i.w $a0, -524288 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3200 vmulwev.h.bu $vr0, $vr0, $vr0 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwod-1.dir/lsx-vmulwod-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwod-1.dir/lsx-vmulwod-1.s index 435e3586..84efbceb 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwod-1.dir/lsx-vmulwod-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwod-1.dir/lsx-vmulwod-1.s @@ -721,13 +721,12 @@ main: # @main lu32i.d $s0, 16256 vreplgr2vr.d $vr0, $s0 vst $vr0, $sp, 64 - lu12i.w $a0, 260096 - vreplgr2vr.w $vr0, $a0 lu12i.w $a0, 16 ori $a0, $a0, 1 lu32i.d $a0, 65536 - vreplgr2vr.d $vr1, $a0 - vmulwod.d.w $vr0, $vr0, $vr1 + vreplgr2vr.d $vr0, $a0 + vldi $vr1, -1424 + vmulwod.d.w $vr0, $vr1, $vr0 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 addi.d $a1, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwod-2.dir/lsx-vmulwod-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwod-2.dir/lsx-vmulwod-2.s index 0130ee61..aca4ed16 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwod-2.dir/lsx-vmulwod-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vmulwod-2.dir/lsx-vmulwod-2.s @@ -1099,9 +1099,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr1, $sp, 48 # 16-byte Folded Reload vst $vr1, $sp, 64 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1552 vmulwod.q.du $vr0, $vr0, $vr1 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 @@ -1147,8 +1145,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_51) vld $vr1, $a0, %pc_lo12(.LCPI2_51) vst $vr0, $sp, 64 - lu12i.w $a0, 40960 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3318 vmulwod.q.du $vr0, $vr0, $vr1 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vnori.dir/lsx-vnori.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vnori.dir/lsx-vnori.s index d05afbdd..d7c8336d 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vnori.dir/lsx-vnori.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vnori.dir/lsx-vnori.s @@ -323,9 +323,7 @@ main: # @main lu52i.d $a0, $a0, -820 vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 48 - lu12i.w $a0, -16 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1780 vnori.b $vr0, $vr0, 51 vst $vr0, $sp, 64 pcalau12i $a0, %pc_hi20(.L.str.5) diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpackev.dir/lsx-vpackev.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpackev.dir/lsx-vpackev.s index bca78dd1..4db37411 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpackev.dir/lsx-vpackev.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpackev.dir/lsx-vpackev.s @@ -662,22 +662,22 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -144 - st.d $ra, $sp, 136 # 8-byte Folded Spill - st.d $fp, $sp, 128 # 8-byte Folded Spill + addi.d $sp, $sp, -128 + st.d $ra, $sp, 120 # 8-byte Folded Spill + st.d $fp, $sp, 112 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) vld $vr1, $a0, %pc_lo12(.LCPI2_1) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vrepli.b $vr0, 0 - vst $vr0, $sp, 80 # 16-byte Folded Spill + vst $vr0, $sp, 64 # 16-byte Folded Spill vpackev.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 + vst $vr0, $sp, 96 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $fp, $a0, %pc_lo12(.L.str.5) - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 23 move $a3, $fp @@ -687,12 +687,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_2) pcalau12i $a0, %pc_hi20(.LCPI2_3) vld $vr1, $a0, %pc_lo12(.LCPI2_3) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackev.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 29 move $a3, $fp @@ -700,15 +700,15 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_4) vld $vr1, $a0, %pc_lo12(.LCPI2_4) - vst $vr1, $sp, 48 # 16-byte Folded Spill + vst $vr1, $sp, 32 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_5) vld $vr0, $a0, %pc_lo12(.LCPI2_5) - vst $vr1, $sp, 96 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr1, $sp, 80 + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackev.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 35 move $a3, $fp @@ -718,12 +718,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_6) pcalau12i $a0, %pc_hi20(.LCPI2_7) vld $vr1, $a0, %pc_lo12(.LCPI2_7) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vrepli.w $vr0, 32 vpackev.b $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 41 move $a3, $fp @@ -735,11 +735,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_9) pcalau12i $a0, %pc_hi20(.LCPI2_10) vld $vr2, $a0, %pc_lo12(.LCPI2_10) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackev.b $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 47 move $a3, $fp @@ -751,11 +751,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_12) pcalau12i $a0, %pc_hi20(.LCPI2_13) vld $vr2, $a0, %pc_lo12(.LCPI2_13) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackev.b $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 53 move $a3, $fp @@ -765,11 +765,11 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_14) pcalau12i $a0, %pc_hi20(.LCPI2_15) vld $vr1, $a0, %pc_lo12(.LCPI2_15) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackev.b $vr0, $vr1, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 59 move $a3, $fp @@ -777,16 +777,16 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_16) vld $vr0, $a0, %pc_lo12(.LCPI2_16) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 pcalau12i $a0, %pc_hi20(.LCPI2_17) vld $vr0, $a0, %pc_lo12(.LCPI2_17) lu12i.w $a0, 1 ori $a0, $a0, 514 vreplgr2vr.h $vr1, $a0 vpackev.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 65 move $a3, $fp @@ -796,12 +796,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_18) pcalau12i $a0, %pc_hi20(.LCPI2_19) vld $vr1, $a0, %pc_lo12(.LCPI2_19) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vrepli.h $vr0, 4 vpackev.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 71 move $a3, $fp @@ -811,29 +811,27 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_20) pcalau12i $a0, %pc_hi20(.LCPI2_21) vld $vr1, $a0, %pc_lo12(.LCPI2_21) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 lu12i.w $a0, -2048 vreplgr2vr.w $vr0, $a0 vpackev.b $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 77 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr0, $a0 - vst $vr0, $sp, 96 + vldi $vr0, -2305 + vst $vr0, $sp, 80 vrepli.b $vr0, -1 - vst $vr0, $sp, 64 # 16-byte Folded Spill - vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 48 # 16-byte Folded Spill + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackev.h $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 83 move $a3, $fp @@ -843,12 +841,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_22) pcalau12i $a0, %pc_hi20(.LCPI2_23) vld $vr1, $a0, %pc_lo12(.LCPI2_23) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackev.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 89 move $a3, $fp @@ -858,24 +856,24 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_24) pcalau12i $a0, %pc_hi20(.LCPI2_25) vld $vr1, $a0, %pc_lo12(.LCPI2_25) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackev.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 95 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 vpackev.h $vr0, $vr0, $vr0 - vst $vr0, $sp, 32 # 16-byte Folded Spill - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 16 # 16-byte Folded Spill + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 101 move $a3, $fp @@ -885,11 +883,11 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_26) pcalau12i $a0, %pc_hi20(.LCPI2_27) vld $vr1, $a0, %pc_lo12(.LCPI2_27) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackev.h $vr0, $vr1, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 107 move $a3, $fp @@ -901,23 +899,23 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_29) pcalau12i $a0, %pc_hi20(.LCPI2_30) vld $vr2, $a0, %pc_lo12(.LCPI2_30) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackev.h $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 113 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.h $vr0, 2 - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vrepli.w $vr0, 2 vpackev.h $vr0, $vr0, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 119 move $a3, $fp @@ -926,24 +924,24 @@ main: # @main lu12i.w $a0, 2 ori $a0, $a0, 128 vreplgr2vr.h $vr0, $a0 - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vreplgr2vr.w $vr0, $a0 vpackev.h $vr0, $vr0, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 125 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.d $vr0, 11 - vst $vr0, $sp, 96 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackev.h $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 131 move $a3, $fp @@ -951,13 +949,13 @@ main: # @main jirl $ra, $ra, 0 lu12i.w $a0, 432 vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vrepli.d $vr0, 27 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackev.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 137 move $a3, $fp @@ -965,13 +963,13 @@ main: # @main jirl $ra, $ra, 0 lu12i.w $a0, -16 vreplgr2vr.w $vr0, $a0 - vst $vr0, $sp, 96 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackev.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 143 move $a3, $fp @@ -981,12 +979,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_31) pcalau12i $a0, %pc_hi20(.LCPI2_32) vld $vr1, $a0, %pc_lo12(.LCPI2_32) - vst $vr0, $sp, 96 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 48 # 16-byte Folded Reload vpackev.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 149 move $a3, $fp @@ -996,11 +994,11 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_33) pcalau12i $a0, %pc_hi20(.LCPI2_34) vld $vr1, $a0, %pc_lo12(.LCPI2_34) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackev.h $vr0, $vr1, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 155 move $a3, $fp @@ -1010,50 +1008,47 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_35) pcalau12i $a0, %pc_hi20(.LCPI2_36) vld $vr1, $a0, %pc_lo12(.LCPI2_36) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackev.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 161 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 16 # 16-byte Folded Reload vst $vr0, $sp, 96 - vld $vr0, $sp, 32 # 16-byte Folded Reload - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 167 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 vpackev.w $vr1, $vr0, $vr0 - vst $vr1, $sp, 32 # 16-byte Folded Spill - vst $vr1, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr1, $sp, 16 # 16-byte Folded Spill + vst $vr1, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 173 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr1, $a0 - vst $vr1, $sp, 16 # 16-byte Folded Spill - vst $vr1, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vpackev.w $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vldi $vr0, -1777 + vst $vr0, $sp, 80 + vld $vr1, $sp, 64 # 16-byte Folded Reload + vpackev.w $vr0, $vr1, $vr0 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 179 move $a3, $fp @@ -1063,11 +1058,11 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_37) pcalau12i $a0, %pc_hi20(.LCPI2_38) vld $vr1, $a0, %pc_lo12(.LCPI2_38) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackev.w $vr0, $vr1, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 185 move $a3, $fp @@ -1079,11 +1074,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_40) pcalau12i $a0, %pc_hi20(.LCPI2_41) vld $vr2, $a0, %pc_lo12(.LCPI2_41) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackev.w $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 191 move $a3, $fp @@ -1091,36 +1086,36 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_42) vld $vr0, $a0, %pc_lo12(.LCPI2_42) - vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 96 + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 80 vpackev.w $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 197 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 16 # 16-byte Folded Reload - vst $vr0, $sp, 96 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vld $vr1, $sp, 80 # 16-byte Folded Reload + vldi $vr0, -1777 + vst $vr0, $sp, 80 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackev.w $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 203 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 16 # 16-byte Folded Reload vst $vr0, $sp, 96 - vld $vr0, $sp, 32 # 16-byte Folded Reload - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 209 move $a3, $fp @@ -1130,12 +1125,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_43) pcalau12i $a0, %pc_hi20(.LCPI2_44) vld $vr1, $a0, %pc_lo12(.LCPI2_44) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackev.w $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 215 move $a3, $fp @@ -1147,11 +1142,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_46) pcalau12i $a0, %pc_hi20(.LCPI2_47) vld $vr2, $a0, %pc_lo12(.LCPI2_47) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackev.w $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 221 move $a3, $fp @@ -1161,24 +1156,24 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_48) pcalau12i $a0, %pc_hi20(.LCPI2_49) vld $vr1, $a0, %pc_lo12(.LCPI2_49) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackev.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 227 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 vpackev.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 64 # 16-byte Folded Spill - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 48 # 16-byte Folded Spill + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 233 move $a3, $fp @@ -1188,39 +1183,37 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_50) pcalau12i $a0, %pc_hi20(.LCPI2_51) vld $vr1, $a0, %pc_lo12(.LCPI2_51) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 lu52i.d $a0, $zero, -8 vreplgr2vr.d $vr0, $a0 vpackev.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 239 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 80 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 245 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - pcalau12i $a1, %pc_hi20(.LCPI2_52) - vld $vr0, $a1, %pc_lo12(.LCPI2_52) - lu32i.d $a0, -1 - vreplgr2vr.d $vr1, $a0 - vst $vr1, $sp, 96 + pcalau12i $a0, %pc_hi20(.LCPI2_52) + vld $vr0, $a0, %pc_lo12(.LCPI2_52) + vldi $vr1, -1552 + vst $vr1, $sp, 80 vpackev.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 251 move $a3, $fp @@ -1228,13 +1221,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_53) vld $vr0, $a0, %pc_lo12(.LCPI2_53) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vrepli.b $vr0, -29 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackev.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 257 move $a3, $fp @@ -1242,14 +1235,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_54) vld $vr0, $a0, %pc_lo12(.LCPI2_54) - vst $vr0, $sp, 96 - lu12i.w $a0, 260096 - vreplgr2vr.w $vr0, $a0 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vldi $vr0, -1424 + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackev.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 263 move $a3, $fp @@ -1261,22 +1253,22 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_56) pcalau12i $a0, %pc_hi20(.LCPI2_57) vld $vr2, $a0, %pc_lo12(.LCPI2_57) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackev.d $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 269 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.h $vr0, 1 - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackev.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 275 move $a3, $fp @@ -1286,12 +1278,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_58) pcalau12i $a0, %pc_hi20(.LCPI2_59) vld $vr1, $a0, %pc_lo12(.LCPI2_59) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackev.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 281 move $a3, $fp @@ -1301,12 +1293,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_60) pcalau12i $a0, %pc_hi20(.LCPI2_61) vld $vr1, $a0, %pc_lo12(.LCPI2_61) - vst $vr0, $sp, 96 - vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 32 # 16-byte Folded Reload vpackev.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 287 move $a3, $fp @@ -1318,11 +1310,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_63) pcalau12i $a0, %pc_hi20(.LCPI2_64) vld $vr2, $a0, %pc_lo12(.LCPI2_64) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackev.d $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 293 move $a3, $fp @@ -1335,31 +1327,31 @@ main: # @main vld $vr0, $a1, %pc_lo12(.LCPI2_65) lu52i.d $a0, $a0, 32 vreplgr2vr.d $vr1, $a0 - vst $vr1, $sp, 96 + vst $vr1, $sp, 80 vpackev.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 299 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 80 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 305 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $fp, $sp, 128 # 8-byte Folded Reload - ld.d $ra, $sp, 136 # 8-byte Folded Reload - addi.d $sp, $sp, 144 + ld.d $fp, $sp, 112 # 8-byte Folded Reload + ld.d $ra, $sp, 120 # 8-byte Folded Reload + addi.d $sp, $sp, 128 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpackod.dir/lsx-vpackod.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpackod.dir/lsx-vpackod.s index 26c7b475..888d9007 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpackod.dir/lsx-vpackod.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpackod.dir/lsx-vpackod.s @@ -649,33 +649,33 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -160 - st.d $ra, $sp, 152 # 8-byte Folded Spill - st.d $fp, $sp, 144 # 8-byte Folded Spill - st.d $s0, $sp, 136 # 8-byte Folded Spill + addi.d $sp, $sp, -144 + st.d $ra, $sp, 136 # 8-byte Folded Spill + st.d $fp, $sp, 128 # 8-byte Folded Spill + st.d $s0, $sp, 120 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) vrepli.b $vr1, 0 - vst $vr1, $sp, 80 # 16-byte Folded Spill - vst $vr1, $sp, 96 + vst $vr1, $sp, 64 # 16-byte Folded Spill + vst $vr1, $sp, 80 vpackod.b $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 + vst $vr0, $sp, 96 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $fp, $a0, %pc_lo12(.L.str.5) - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 23 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 vpackod.b $vr0, $vr0, $vr0 - vst $vr0, $sp, 64 # 16-byte Folded Spill - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 48 # 16-byte Folded Spill + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 29 move $a3, $fp @@ -685,23 +685,23 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_1) pcalau12i $a0, %pc_hi20(.LCPI2_2) vld $vr1, $a0, %pc_lo12(.LCPI2_2) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackod.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 35 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 80 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 41 move $a3, $fp @@ -711,12 +711,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_3) pcalau12i $a0, %pc_hi20(.LCPI2_4) vld $vr1, $a0, %pc_lo12(.LCPI2_4) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackod.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 47 move $a3, $fp @@ -728,11 +728,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_6) pcalau12i $a0, %pc_hi20(.LCPI2_7) vld $vr2, $a0, %pc_lo12(.LCPI2_7) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackod.b $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 53 move $a3, $fp @@ -742,15 +742,15 @@ main: # @main ori $a0, $zero, 0 lu32i.d $a0, 256 vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 ori $a0, $zero, 252 lu32i.d $a0, 256 vreplgr2vr.d $vr0, $a0 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackod.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 59 move $a3, $fp @@ -760,14 +760,14 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_8) pcalau12i $a0, %pc_hi20(.LCPI2_9) vld $vr2, $a0, %pc_lo12(.LCPI2_9) - vst $vr2, $sp, 32 # 16-byte Folded Spill + vst $vr2, $sp, 16 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_10) vld $vr1, $a0, %pc_lo12(.LCPI2_10) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackod.b $vr0, $vr1, $vr2 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 65 move $a3, $fp @@ -777,12 +777,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_11) pcalau12i $a0, %pc_hi20(.LCPI2_12) vld $vr1, $a0, %pc_lo12(.LCPI2_12) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackod.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 71 move $a3, $fp @@ -791,15 +791,15 @@ main: # @main lu12i.w $a0, 3280 ori $a0, $a0, 99 vreplgr2vr.w $vr0, $a0 - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 lu12i.w $a0, -207306 ori $a0, $a0, 867 vreplgr2vr.w $vr0, $a0 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackod.b $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 77 move $a3, $fp @@ -809,12 +809,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_13) pcalau12i $a0, %pc_hi20(.LCPI2_14) vld $vr1, $a0, %pc_lo12(.LCPI2_14) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackod.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 83 move $a3, $fp @@ -824,12 +824,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_15) pcalau12i $a0, %pc_hi20(.LCPI2_16) vld $vr1, $a0, %pc_lo12(.LCPI2_16) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vrepli.d $vr0, 64 vpackod.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 89 move $a3, $fp @@ -837,12 +837,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_17) vld $vr0, $a0, %pc_lo12(.LCPI2_17) - vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 96 + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 80 vpackod.h $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 95 move $a3, $fp @@ -850,53 +850,50 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_18) vld $vr0, $a0, %pc_lo12(.LCPI2_18) - vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 96 + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 80 vpackod.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 101 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - pcalau12i $a1, %pc_hi20(.LCPI2_19) - vld $vr0, $a1, %pc_lo12(.LCPI2_19) - vreplgr2vr.w $vr1, $a0 - vst $vr1, $sp, 16 # 16-byte Folded Spill - vst $vr1, $sp, 96 + pcalau12i $a0, %pc_hi20(.LCPI2_19) + vld $vr0, $a0, %pc_lo12(.LCPI2_19) + vldi $vr1, -2305 + vst $vr1, $sp, 80 vrepli.b $vr1, -1 - vst $vr1, $sp, 48 # 16-byte Folded Spill + vst $vr1, $sp, 32 # 16-byte Folded Spill vpackod.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 107 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 vpackod.h $vr0, $vr0, $vr0 - vst $vr0, $sp, 64 # 16-byte Folded Spill - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 48 # 16-byte Folded Spill + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 113 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 80 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 119 move $a3, $fp @@ -908,11 +905,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_21) pcalau12i $a0, %pc_hi20(.LCPI2_22) vld $vr2, $a0, %pc_lo12(.LCPI2_22) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackod.h $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 125 move $a3, $fp @@ -921,13 +918,13 @@ main: # @main lu12i.w $a0, 4 ori $a0, $a0, 3968 vreplgr2vr.h $vr0, $a0 - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 lu12i.w $a0, 325632 vreplgr2vr.w $vr0, $a0 vpackod.h $vr0, $vr0, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 131 move $a3, $fp @@ -939,11 +936,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_24) pcalau12i $a0, %pc_hi20(.LCPI2_25) vld $vr2, $a0, %pc_lo12(.LCPI2_25) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackod.h $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 137 move $a3, $fp @@ -953,12 +950,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_26) pcalau12i $a0, %pc_hi20(.LCPI2_27) vld $vr1, $a0, %pc_lo12(.LCPI2_27) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackod.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 143 move $a3, $fp @@ -968,12 +965,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_28) pcalau12i $a0, %pc_hi20(.LCPI2_29) vld $vr1, $a0, %pc_lo12(.LCPI2_29) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackod.h $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 149 move $a3, $fp @@ -982,13 +979,13 @@ main: # @main lu12i.w $a0, 495 ori $a0, $a0, 4014 vreplgr2vr.w $vr0, $a0 - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vrepli.h $vr0, -82 vrepli.h $vr1, 30 vpackod.h $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 155 move $a3, $fp @@ -998,11 +995,11 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_30) pcalau12i $a0, %pc_hi20(.LCPI2_31) vld $vr1, $a0, %pc_lo12(.LCPI2_31) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackod.h $vr0, $vr1, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 161 move $a3, $fp @@ -1014,11 +1011,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_33) pcalau12i $a0, %pc_hi20(.LCPI2_34) vld $vr2, $a0, %pc_lo12(.LCPI2_34) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackod.h $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 167 move $a3, $fp @@ -1028,12 +1025,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_35) pcalau12i $a0, %pc_hi20(.LCPI2_36) vld $vr1, $a0, %pc_lo12(.LCPI2_36) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackod.w $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 173 move $a3, $fp @@ -1043,37 +1040,37 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_37) pcalau12i $a0, %pc_hi20(.LCPI2_38) vld $vr1, $a0, %pc_lo12(.LCPI2_38) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackod.w $vr0, $vr1, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 179 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 vrepli.d $vr0, 1 - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 addi.w $a0, $zero, -2 lu32i.d $a0, 1 vreplgr2vr.d $vr0, $a0 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackod.w $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 185 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 vpackod.w $vr0, $vr0, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 191 move $a3, $fp @@ -1083,12 +1080,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_39) pcalau12i $a0, %pc_hi20(.LCPI2_40) vld $vr1, $a0, %pc_lo12(.LCPI2_40) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackod.w $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 197 move $a3, $fp @@ -1098,12 +1095,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_41) pcalau12i $a0, %pc_hi20(.LCPI2_42) vld $vr1, $a0, %pc_lo12(.LCPI2_42) - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 64 # 16-byte Folded Reload vpackod.w $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 203 move $a3, $fp @@ -1115,26 +1112,24 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_44) pcalau12i $a0, %pc_hi20(.LCPI2_45) vld $vr2, $a0, %pc_lo12(.LCPI2_45) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackod.w $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 209 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 96 - vld $vr0, $sp, 48 # 16-byte Folded Reload - vld $vr1, $sp, 80 # 16-byte Folded Reload + vldi $vr0, -1552 + vst $vr0, $sp, 80 + vld $vr0, $sp, 32 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackod.w $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 215 move $a3, $fp @@ -1143,16 +1138,16 @@ main: # @main lu12i.w $a0, 16448 ori $a1, $a0, 1284 vreplgr2vr.d $vr0, $a1 - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 ori $a0, $a0, 1028 lu32i.d $a0, 263428 lu52i.d $a0, $a0, 64 vreplgr2vr.d $vr0, $a0 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackod.w $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 221 move $a3, $fp @@ -1164,11 +1159,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_47) pcalau12i $a0, %pc_hi20(.LCPI2_48) vld $vr2, $a0, %pc_lo12(.LCPI2_48) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackod.w $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 227 move $a3, $fp @@ -1180,11 +1175,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_50) pcalau12i $a0, %pc_hi20(.LCPI2_51) vld $vr2, $a0, %pc_lo12(.LCPI2_51) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackod.w $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 233 move $a3, $fp @@ -1194,12 +1189,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_52) pcalau12i $a0, %pc_hi20(.LCPI2_53) vld $vr1, $a0, %pc_lo12(.LCPI2_53) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vrepli.h $vr0, 1 vpackod.w $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 239 move $a3, $fp @@ -1208,12 +1203,12 @@ main: # @main ori $a0, $zero, 0 lu32i.d $a0, 32768 vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 96 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackod.w $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 245 move $a3, $fp @@ -1222,25 +1217,25 @@ main: # @main ori $a0, $zero, 0 lu32i.d $a0, -65538 vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vrepli.h $vr0, -2 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackod.w $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 251 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 vpackod.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 64 # 16-byte Folded Spill - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 48 # 16-byte Folded Spill + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 257 move $a3, $fp @@ -1248,16 +1243,16 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_54) vld $vr0, $a0, %pc_lo12(.LCPI2_54) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 pcalau12i $a0, %pc_hi20(.LCPI2_55) vld $vr0, $a0, %pc_lo12(.LCPI2_55) lu12i.w $a0, -130044 ori $a0, $a0, 65 vreplgr2vr.w $vr1, $a0 vpackod.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 263 move $a3, $fp @@ -1265,39 +1260,39 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_56) vld $vr0, $a0, %pc_lo12(.LCPI2_56) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 lu32i.d $s0, 16 lu52i.d $a0, $s0, 1039 vreplgr2vr.d $vr0, $a0 vrepli.h $vr1, -4 vpackod.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 269 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 80 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 275 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 16 # 16-byte Folded Reload + vst $vr0, $sp, 80 vld $vr0, $sp, 32 # 16-byte Folded Reload - vst $vr0, $sp, 96 - vld $vr0, $sp, 48 # 16-byte Folded Reload - vld $vr1, $sp, 80 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload vpackod.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 281 move $a3, $fp @@ -1306,33 +1301,33 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_57) vld $vr0, $a0, %pc_lo12(.LCPI2_57) vrepli.w $vr1, 32 - vst $vr1, $sp, 96 + vst $vr1, $sp, 80 vpackod.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 287 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 80 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 293 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 80 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 299 move $a3, $fp @@ -1344,11 +1339,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_59) pcalau12i $a0, %pc_hi20(.LCPI2_60) vld $vr2, $a0, %pc_lo12(.LCPI2_60) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vpackod.d $vr0, $vr2, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 305 move $a3, $fp @@ -1358,22 +1353,22 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_61) pcalau12i $a0, %pc_hi20(.LCPI2_62) vld $vr1, $a0, %pc_lo12(.LCPI2_62) - vst $vr0, $sp, 96 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vldi $vr0, -2305 vpackod.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 311 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 136 # 8-byte Folded Reload - ld.d $fp, $sp, 144 # 8-byte Folded Reload - ld.d $ra, $sp, 152 # 8-byte Folded Reload - addi.d $sp, $sp, 160 + ld.d $s0, $sp, 120 # 8-byte Folded Reload + ld.d $fp, $sp, 128 # 8-byte Folded Reload + ld.d $ra, $sp, 136 # 8-byte Folded Reload + addi.d $sp, $sp, 144 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpickev.dir/lsx-vpickev.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpickev.dir/lsx-vpickev.s index 3b9dd946..d2c70d21 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpickev.dir/lsx-vpickev.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpickev.dir/lsx-vpickev.s @@ -483,12 +483,10 @@ main: # @main addi.d $sp, $sp, -128 st.d $ra, $sp, 120 # 8-byte Folded Spill st.d $fp, $sp, 112 # 8-byte Folded Spill - addi.w $a0, $zero, -1 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - vld $vr1, $a1, %pc_lo12(.LCPI2_0) + pcalau12i $a0, %pc_hi20(.LCPI2_0) + vld $vr1, $a0, %pc_lo12(.LCPI2_0) vst $vr1, $sp, 32 # 16-byte Folded Spill - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1777 vst $vr0, $sp, 80 vpickev.b $vr0, $vr1, $vr1 vst $vr0, $sp, 96 @@ -872,13 +870,11 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_35) vld $vr0, $a0, %pc_lo12(.LCPI2_35) vst $vr0, $sp, 80 - lu12i.w $a0, -16 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 lu12i.w $a0, 14 ori $a0, $a0, 3578 - vreplgr2vr.h $vr1, $a0 - vpickev.d $vr0, $vr1, $vr0 + vreplgr2vr.h $vr0, $a0 + vldi $vr1, -1780 + vpickev.d $vr0, $vr0, $vr1 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 addi.d $a1, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpickod.dir/lsx-vpickod.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpickod.dir/lsx-vpickod.s index f98c19f9..a17c2969 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpickod.dir/lsx-vpickod.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpickod.dir/lsx-vpickod.s @@ -595,10 +595,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -144 - st.d $ra, $sp, 136 # 8-byte Folded Spill - st.d $fp, $sp, 128 # 8-byte Folded Spill - st.d $s0, $sp, 120 # 8-byte Folded Spill + addi.d $sp, $sp, -128 + st.d $ra, $sp, 120 # 8-byte Folded Spill + st.d $fp, $sp, 112 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) vst $vr0, $sp, 32 # 16-byte Folded Spill @@ -711,10 +710,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_15) vld $vr0, $a0, %pc_lo12(.LCPI2_15) vst $vr0, $sp, 80 - ori $s0, $zero, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1552 vld $vr1, $sp, 64 # 16-byte Folded Reload vpickod.b $vr0, $vr0, $vr1 vst $vr0, $sp, 96 @@ -782,10 +778,11 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_23) - vld $vr0, $a0, %pc_lo12(.LCPI2_23) - lu32i.d $s0, 51714 - vreplgr2vr.d $vr1, $s0 + ori $a0, $zero, 0 + pcalau12i $a1, %pc_hi20(.LCPI2_23) + vld $vr0, $a1, %pc_lo12(.LCPI2_23) + lu32i.d $a0, 51714 + vreplgr2vr.d $vr1, $a0 vst $vr1, $sp, 80 vpickod.h $vr0, $vr0, $vr0 vst $vr0, $sp, 96 @@ -1101,10 +1098,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 120 # 8-byte Folded Reload - ld.d $fp, $sp, 128 # 8-byte Folded Reload - ld.d $ra, $sp, 136 # 8-byte Folded Reload - addi.d $sp, $sp, 144 + ld.d $fp, $sp, 112 # 8-byte Folded Reload + ld.d $ra, $sp, 120 # 8-byte Folded Reload + addi.d $sp, $sp, 128 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpickve2gr.dir/lsx-vpickve2gr.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpickve2gr.dir/lsx-vpickve2gr.s index 6406f4a4..5d29df5e 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpickve2gr.dir/lsx-vpickve2gr.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vpickve2gr.dir/lsx-vpickve2gr.s @@ -411,7 +411,7 @@ main: # @main jirl $ra, $ra, 0 ori $a0, $zero, 1024 st.w $a0, $sp, 64 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2812 vpickve2gr.hu $a0, $vr0, 5 st.w $a0, $sp, 68 addi.d $a0, $sp, 64 @@ -586,9 +586,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 lu12i.w $a0, 260096 - lu52i.d $a1, $a0, 1016 - st.d $a1, $sp, 48 - vreplgr2vr.w $vr0, $a0 + lu52i.d $a0, $a0, 1016 + st.d $a0, $sp, 48 + vldi $vr0, -1424 vpickve2gr.du $a0, $vr0, 0 st.d $a0, $sp, 56 addi.d $a0, $sp, 48 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vreplgr2vr.dir/lsx-vreplgr2vr.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vreplgr2vr.dir/lsx-vreplgr2vr.s index ee4b1244..51ed12ad 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vreplgr2vr.dir/lsx-vreplgr2vr.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vreplgr2vr.dir/lsx-vreplgr2vr.s @@ -417,8 +417,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vreplgr2vr.h $vr0, $s0 + vldi $vr0, -2812 vst $vr0, $sp, 80 + vreplgr2vr.h $vr0, $s0 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 addi.d $a1, $sp, 96 @@ -560,9 +561,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 260096 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -1424 vst $vr0, $sp, 80 + lu12i.w $a0, 260096 lu52i.d $a0, $a0, 1016 vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vreplve.dir/lsx-vreplve.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vreplve.dir/lsx-vreplve.s index 49901f76..82424261 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vreplve.dir/lsx-vreplve.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vreplve.dir/lsx-vreplve.s @@ -299,17 +299,15 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -128 - st.d $ra, $sp, 120 # 8-byte Folded Spill - st.d $fp, $sp, 112 # 8-byte Folded Spill - st.d $s0, $sp, 104 # 8-byte Folded Spill - st.d $s1, $sp, 96 # 8-byte Folded Spill - st.d $s2, $sp, 88 # 8-byte Folded Spill + addi.d $sp, $sp, -112 + st.d $ra, $sp, 104 # 8-byte Folded Spill + st.d $fp, $sp, 96 # 8-byte Folded Spill + st.d $s0, $sp, 88 # 8-byte Folded Spill + st.d $s1, $sp, 80 # 8-byte Folded Spill vrepli.b $vr0, 0 vst $vr0, $sp, 32 # 16-byte Folded Spill - lu12i.w $s2, -524288 vst $vr0, $sp, 48 - move $a0, $s2 + lu12i.w $a0, -524288 lu32i.d $a0, 0 vreplgr2vr.d $vr0, $a0 vreplve.b $vr0, $vr0, $zero @@ -388,7 +386,7 @@ main: # @main jirl $ra, $ra, 0 vrepli.b $vr0, -128 vst $vr0, $sp, 48 - vreplgr2vr.w $vr0, $s2 + vldi $vr0, -3200 vreplve.b $vr0, $vr0, $s0 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 @@ -512,10 +510,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -4096 - pcalau12i $a1, %pc_hi20(.LCPI2_4) - vld $vr0, $a1, %pc_lo12(.LCPI2_4) - vreplgr2vr.w $vr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_4) + vld $vr0, $a0, %pc_lo12(.LCPI2_4) + vldi $vr1, -3073 vst $vr1, $sp, 48 ori $a0, $zero, 1024 vreplve.w $vr0, $vr0, $a0 @@ -591,11 +588,7 @@ main: # @main jirl $ra, $ra, 0 vrepli.h $vr0, -256 vst $vr0, $sp, 48 - lu12i.w $a0, -4081 - ori $a0, $a0, 3840 - lu32i.d $a0, -256 - lu52i.d $a0, $a0, 15 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1686 vreplve.w $vr0, $vr0, $zero vst $vr0, $sp, 64 addi.d $a0, $sp, 48 @@ -763,12 +756,11 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s2, $sp, 88 # 8-byte Folded Reload - ld.d $s1, $sp, 96 # 8-byte Folded Reload - ld.d $s0, $sp, 104 # 8-byte Folded Reload - ld.d $fp, $sp, 112 # 8-byte Folded Reload - ld.d $ra, $sp, 120 # 8-byte Folded Reload - addi.d $sp, $sp, 128 + ld.d $s1, $sp, 80 # 8-byte Folded Reload + ld.d $s0, $sp, 88 # 8-byte Folded Reload + ld.d $fp, $sp, 96 # 8-byte Folded Reload + ld.d $ra, $sp, 104 # 8-byte Folded Reload + addi.d $sp, $sp, 112 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vreplvei.dir/lsx-vreplvei.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vreplvei.dir/lsx-vreplvei.s index f009788c..99f07625 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vreplvei.dir/lsx-vreplvei.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vreplvei.dir/lsx-vreplvei.s @@ -491,8 +491,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_7) vld $vr0, $a0, %pc_lo12(.LCPI2_7) - lu12i.w $a0, 8 - vreplgr2vr.h $vr1, $a0 + vldi $vr1, -2688 vst $vr1, $sp, 80 vreplvei.h $vr0, $vr0, 4 vst $vr0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vrotr.dir/lsx-vrotr.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vrotr.dir/lsx-vrotr.s index c885f5a2..21668221 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vrotr.dir/lsx-vrotr.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vrotr.dir/lsx-vrotr.s @@ -804,13 +804,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_24) vld $vr0, $a0, %pc_lo12(.LCPI2_24) - vst $vr0, $sp, 96 pcalau12i $a0, %pc_hi20(.LCPI2_25) - vld $vr0, $a0, %pc_lo12(.LCPI2_25) - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr1, $a0 - vrotr.h $vr0, $vr0, $vr1 + vld $vr1, $a0, %pc_lo12(.LCPI2_25) + vst $vr0, $sp, 96 + vldi $vr0, -2305 + vrotr.h $vr0, $vr1, $vr0 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 addi.d $a1, $sp, 112 @@ -898,9 +896,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 9 - ori $a0, $a0, 3840 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2657 vst $vr0, $sp, 96 lu12i.w $a0, 4 ori $a0, $a0, 3968 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vrotri.dir/lsx-vrotri.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vrotri.dir/lsx-vrotri.s index af85c8ac..01c346bc 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vrotri.dir/lsx-vrotri.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vrotri.dir/lsx-vrotri.s @@ -410,11 +410,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 32768 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3320 vst $vr0, $sp, 48 - lu12i.w $a0, -524288 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3200 vrotri.b $vr0, $vr0, 4 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 @@ -600,9 +598,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -65536 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1600 vst $vr0, $sp, 48 vrotri.h $vr0, $vr0, 10 vst $vr0, $sp, 64 @@ -613,8 +609,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 32 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3582 vst $vr0, $sp, 48 vrepli.w $vr0, 8 vrotri.w $vr0, $vr0, 18 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsadd-1.dir/lsx-vsadd-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsadd-1.dir/lsx-vsadd-1.s index cfe00347..ec7fb737 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsadd-1.dir/lsx-vsadd-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsadd-1.dir/lsx-vsadd-1.s @@ -436,9 +436,7 @@ main: # @main addi.d $sp, $sp, -96 st.d $ra, $sp, 88 # 8-byte Folded Spill st.d $fp, $sp, 80 # 8-byte Folded Spill - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1777 vst $vr0, $sp, 48 vrepli.b $vr1, 0 vst $vr1, $sp, 32 # 16-byte Folded Spill @@ -714,16 +712,16 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 lu12i.w $a0, 260096 - vreplgr2vr.w $vr0, $a0 lu32i.d $a0, -256 lu52i.d $a0, $a0, 2039 - vreplgr2vr.d $vr1, $a0 - vst $vr1, $sp, 48 + vreplgr2vr.d $vr0, $a0 + vst $vr0, $sp, 48 ori $a0, $zero, 0 lu32i.d $a0, -256 lu52i.d $a0, $a0, 1023 - vreplgr2vr.d $vr1, $a0 - vsadd.w $vr0, $vr0, $vr1 + vreplgr2vr.d $vr0, $a0 + vldi $vr1, -1424 + vsadd.w $vr0, $vr1, $vr0 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 addi.d $a1, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsadd-2.dir/lsx-vsadd-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsadd-2.dir/lsx-vsadd-2.s index 4dade1db..7455d845 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsadd-2.dir/lsx-vsadd-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsadd-2.dir/lsx-vsadd-2.s @@ -974,8 +974,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 2048 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2808 vst $vr0, $sp, 64 vld $vr1, $sp, 48 # 16-byte Folded Reload vsadd.wu $vr0, $vr1, $vr0 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsat-1.dir/lsx-vsat-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsat-1.dir/lsx-vsat-1.s index 947756f4..fd751c9b 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsat-1.dir/lsx-vsat-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsat-1.dir/lsx-vsat-1.s @@ -577,8 +577,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 1008 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3521 vst $vr0, $sp, 48 lu12i.w $a0, 524272 vreplgr2vr.w $vr0, $a0 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsat-2.dir/lsx-vsat-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsat-2.dir/lsx-vsat-2.s index 783e61bf..d99de29f 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsat-2.dir/lsx-vsat-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsat-2.dir/lsx-vsat-2.s @@ -417,10 +417,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -112 - st.d $ra, $sp, 104 # 8-byte Folded Spill - st.d $fp, $sp, 96 # 8-byte Folded Spill - st.d $s0, $sp, 88 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) @@ -685,11 +684,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 3 - pcalau12i $a1, %pc_hi20(.LCPI2_18) - vld $vr0, $a1, %pc_lo12(.LCPI2_18) - ori $a0, $a0, 4095 - vreplgr2vr.w $vr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_18) + vld $vr0, $a0, %pc_lo12(.LCPI2_18) + vldi $vr1, -2497 vst $vr1, $sp, 48 vsat.wu $vr0, $vr0, 13 vst $vr0, $sp, 64 @@ -747,8 +744,8 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $s0, $zero, -1 - lu52i.d $a0, $s0, 2047 + addi.w $a0, $zero, -1 + lu52i.d $a0, $a0, 2047 vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 48 vrepli.b $vr0, -1 @@ -865,8 +862,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_26) vld $vr0, $a0, %pc_lo12(.LCPI2_26) - lu32i.d $s0, 65535 - vreplgr2vr.d $vr1, $s0 + vldi $vr1, -1729 vst $vr1, $sp, 48 vsat.du $vr0, $vr0, 47 vst $vr0, $sp, 64 @@ -878,10 +874,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 88 # 8-byte Folded Reload - ld.d $fp, $sp, 96 # 8-byte Folded Reload - ld.d $ra, $sp, 104 # 8-byte Folded Reload - addi.d $sp, $sp, 112 + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vseq.dir/lsx-vseq.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vseq.dir/lsx-vseq.s index de4cabdd..e1406ccb 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vseq.dir/lsx-vseq.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vseq.dir/lsx-vseq.s @@ -672,21 +672,20 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -192 - st.d $ra, $sp, 184 # 8-byte Folded Spill - st.d $fp, $sp, 176 # 8-byte Folded Spill - st.d $s0, $sp, 168 # 8-byte Folded Spill + addi.d $sp, $sp, -160 + st.d $ra, $sp, 152 # 8-byte Folded Spill + st.d $fp, $sp, 144 # 8-byte Folded Spill vrepli.b $vr0, -1 - vst $vr0, $sp, 96 # 16-byte Folded Spill - vst $vr0, $sp, 128 + vst $vr0, $sp, 80 # 16-byte Folded Spill + vst $vr0, $sp, 112 vrepli.b $vr0, 0 - vst $vr0, $sp, 112 # 16-byte Folded Spill + vst $vr0, $sp, 96 # 16-byte Folded Spill vseq.b $vr0, $vr0, $vr0 - vst $vr0, $sp, 144 + vst $vr0, $sp, 128 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $fp, $a0, %pc_lo12(.L.str.5) - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 23 move $a3, $fp @@ -696,12 +695,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) vld $vr1, $a0, %pc_lo12(.LCPI2_1) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vseq.b $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 29 move $a3, $fp @@ -709,40 +708,40 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_2) vld $vr0, $a0, %pc_lo12(.LCPI2_2) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vld $vr1, $sp, 80 # 16-byte Folded Reload vseq.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 35 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vrepli.b $vr0, -128 vrepli.w $vr1, -31 vseq.b $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 41 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 lu12i.w $a0, 8272 ori $a0, $a0, 800 vreplgr2vr.d $vr0, $a0 vseq.b $vr0, $vr0, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 47 move $a3, $fp @@ -754,11 +753,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_4) pcalau12i $a0, %pc_hi20(.LCPI2_5) vld $vr2, $a0, %pc_lo12(.LCPI2_5) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vseq.b $vr0, $vr2, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 53 move $a3, $fp @@ -770,11 +769,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_7) pcalau12i $a0, %pc_hi20(.LCPI2_8) vld $vr2, $a0, %pc_lo12(.LCPI2_8) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vseq.b $vr0, $vr2, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 59 move $a3, $fp @@ -784,30 +783,27 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_9) pcalau12i $a0, %pc_hi20(.LCPI2_10) vld $vr1, $a0, %pc_lo12(.LCPI2_10) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vseq.b $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 65 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, -65281 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 128 - ori $s0, $zero, 0 + vldi $vr0, -1569 + vst $vr0, $sp, 112 ori $a0, $zero, 0 lu32i.d $a0, 32768 vreplgr2vr.d $vr0, $a0 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vseq.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 71 move $a3, $fp @@ -817,12 +813,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_11) pcalau12i $a0, %pc_hi20(.LCPI2_12) vld $vr1, $a0, %pc_lo12(.LCPI2_12) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vseq.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 77 move $a3, $fp @@ -834,23 +830,23 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_14) pcalau12i $a0, %pc_hi20(.LCPI2_15) vld $vr2, $a0, %pc_lo12(.LCPI2_15) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vseq.b $vr0, $vr2, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 83 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 vrepli.b $vr0, 99 vseq.b $vr0, $vr0, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 89 move $a3, $fp @@ -860,12 +856,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_16) pcalau12i $a0, %pc_hi20(.LCPI2_17) vld $vr1, $a0, %pc_lo12(.LCPI2_17) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vseq.b $vr0, $vr0, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 95 move $a3, $fp @@ -873,15 +869,15 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_18) vld $vr1, $a0, %pc_lo12(.LCPI2_18) - vst $vr1, $sp, 80 # 16-byte Folded Spill + vst $vr1, $sp, 64 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_19) vld $vr0, $a0, %pc_lo12(.LCPI2_19) - vst $vr1, $sp, 128 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vld $vr1, $sp, 96 # 16-byte Folded Reload vseq.b $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 101 move $a3, $fp @@ -890,36 +886,32 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_20) vld $vr0, $a0, %pc_lo12(.LCPI2_20) vst $vr0, $sp, 32 # 16-byte Folded Spill - vst $vr0, $sp, 128 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vld $vr1, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vseq.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 107 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 - lu12i.w $a0, 8 - vreplgr2vr.h $vr0, $a0 - vst $vr0, $sp, 48 # 16-byte Folded Spill + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vldi $vr0, -2688 vseq.h $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 113 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, 65535 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 128 + vldi $vr0, -1744 + vst $vr0, $sp, 112 lu12i.w $a0, 130048 vreplgr2vr.w $vr0, $a0 lu12i.w $a0, 16 @@ -927,22 +919,22 @@ main: # @main lu32i.d $a0, 65536 vreplgr2vr.d $vr1, $a0 vseq.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 119 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload vseq.h $vr0, $vr0, $vr0 - vst $vr0, $sp, 64 # 16-byte Folded Spill - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 48 # 16-byte Folded Spill + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 125 move $a3, $fp @@ -955,23 +947,23 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_22) pcalau12i $a0, %pc_hi20(.LCPI2_23) vld $vr1, $a0, %pc_lo12(.LCPI2_23) - vst $vr2, $sp, 128 + vst $vr2, $sp, 112 vseq.h $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 131 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vld $vr0, $sp, 80 # 16-byte Folded Reload vseq.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 137 move $a3, $fp @@ -981,12 +973,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_24) pcalau12i $a0, %pc_hi20(.LCPI2_25) vld $vr1, $a0, %pc_lo12(.LCPI2_25) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vseq.h $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 143 move $a3, $fp @@ -996,12 +988,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_26) pcalau12i $a0, %pc_hi20(.LCPI2_27) vld $vr1, $a0, %pc_lo12(.LCPI2_27) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vseq.h $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 149 move $a3, $fp @@ -1011,34 +1003,34 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_28) pcalau12i $a0, %pc_hi20(.LCPI2_29) vld $vr1, $a0, %pc_lo12(.LCPI2_29) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vseq.h $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 155 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 128 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 161 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 128 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 167 move $a3, $fp @@ -1048,12 +1040,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_30) pcalau12i $a0, %pc_hi20(.LCPI2_31) vld $vr1, $a0, %pc_lo12(.LCPI2_31) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vseq.h $vr0, $vr0, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 173 move $a3, $fp @@ -1065,11 +1057,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_33) pcalau12i $a0, %pc_hi20(.LCPI2_34) vld $vr2, $a0, %pc_lo12(.LCPI2_34) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vseq.h $vr0, $vr2, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 179 move $a3, $fp @@ -1079,25 +1071,24 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_35) pcalau12i $a0, %pc_hi20(.LCPI2_36) vld $vr1, $a0, %pc_lo12(.LCPI2_36) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vseq.h $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 185 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu32i.d $s0, -65536 - vreplgr2vr.d $vr0, $s0 - vst $vr0, $sp, 128 - vld $vr1, $sp, 96 # 16-byte Folded Reload + vldi $vr0, -1600 + vst $vr0, $sp, 112 + vld $vr1, $sp, 80 # 16-byte Folded Reload vseq.h $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 191 move $a3, $fp @@ -1105,12 +1096,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_37) vld $vr0, $a0, %pc_lo12(.LCPI2_37) - vld $vr1, $sp, 96 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr1, $sp, 112 vseq.w $vr0, $vr0, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 197 move $a3, $fp @@ -1120,61 +1111,60 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_38) pcalau12i $a0, %pc_hi20(.LCPI2_39) vld $vr1, $a0, %pc_lo12(.LCPI2_39) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vseq.w $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 203 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload vseq.w $vr0, $vr0, $vr0 - vst $vr0, $sp, 64 # 16-byte Folded Spill - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 48 # 16-byte Folded Spill + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 209 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 vld $vr0, $sp, 16 # 16-byte Folded Reload vseq.w $vr0, $vr0, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 215 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 128 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 221 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 - ori $a0, $zero, 2048 - vreplgr2vr.h $vr0, $a0 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vldi $vr0, -2808 vseq.w $vr0, $vr0, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 227 move $a3, $fp @@ -1184,23 +1174,23 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_40) pcalau12i $a0, %pc_hi20(.LCPI2_41) vld $vr1, $a0, %pc_lo12(.LCPI2_41) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vseq.w $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 233 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 128 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 239 move $a3, $fp @@ -1212,11 +1202,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_43) pcalau12i $a0, %pc_hi20(.LCPI2_44) vld $vr2, $a0, %pc_lo12(.LCPI2_44) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vseq.w $vr0, $vr2, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 245 move $a3, $fp @@ -1226,47 +1216,47 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_45) pcalau12i $a0, %pc_hi20(.LCPI2_46) vld $vr1, $a0, %pc_lo12(.LCPI2_46) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vseq.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 251 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload vseq.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 64 # 16-byte Folded Spill - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 48 # 16-byte Folded Spill + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 257 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 128 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 263 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 128 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 269 move $a3, $fp @@ -1274,38 +1264,38 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_47) vld $vr0, $a0, %pc_lo12(.LCPI2_47) - vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 128 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vld $vr1, $sp, 96 # 16-byte Folded Reload vseq.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 275 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 112 vld $vr0, $sp, 32 # 16-byte Folded Reload - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vseq.d $vr0, $vr0, $vr1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 281 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 - vld $vr0, $sp, 96 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vld $vr0, $sp, 80 # 16-byte Folded Reload vseq.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 287 move $a3, $fp @@ -1313,55 +1303,55 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_48) vld $vr0, $a0, %pc_lo12(.LCPI2_48) - vld $vr1, $sp, 96 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 80 # 16-byte Folded Reload + vst $vr1, $sp, 112 vseq.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 293 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 - vld $vr0, $sp, 48 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vldi $vr0, -2688 vseq.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 299 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 96 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 lu12i.w $a0, -337190 ori $a0, $a0, 3501 lu32i.d $a0, 0 vreplgr2vr.d $vr0, $a0 vseq.d $vr0, $vr0, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 305 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 pcalau12i $a0, %pc_hi20(.LCPI2_49) vld $vr0, $a0, %pc_lo12(.LCPI2_49) lu12i.w $a0, -12337 ori $a0, $a0, 3292 vreplgr2vr.w $vr1, $a0 vseq.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 311 move $a3, $fp @@ -1371,22 +1361,21 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_50) pcalau12i $a0, %pc_hi20(.LCPI2_51) vld $vr1, $a0, %pc_lo12(.LCPI2_51) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vseq.d $vr0, $vr1, $vr0 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 317 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 168 # 8-byte Folded Reload - ld.d $fp, $sp, 176 # 8-byte Folded Reload - ld.d $ra, $sp, 184 # 8-byte Folded Reload - addi.d $sp, $sp, 192 + ld.d $fp, $sp, 144 # 8-byte Folded Reload + ld.d $ra, $sp, 152 # 8-byte Folded Reload + addi.d $sp, $sp, 160 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vseqi.dir/lsx-vseqi.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vseqi.dir/lsx-vseqi.s index d1b76dff..5b805b88 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vseqi.dir/lsx-vseqi.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vseqi.dir/lsx-vseqi.s @@ -452,8 +452,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 64 - lu12i.w $a0, 256 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3568 vseqi.b $vr0, $vr0, -2 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsigncov.dir/lsx-vsigncov.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsigncov.dir/lsx-vsigncov.s index 2ce8a1c5..bab763ec 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsigncov.dir/lsx-vsigncov.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsigncov.dir/lsx-vsigncov.s @@ -905,13 +905,12 @@ main: # @main lu52i.d $a0, $zero, 1016 vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 80 - lu12i.w $a0, 260096 - vreplgr2vr.w $vr0, $a0 ori $a0, $zero, 0 lu32i.d $a0, -256 lu52i.d $a0, $a0, 1023 - vreplgr2vr.d $vr1, $a0 - vsigncov.w $vr0, $vr1, $vr0 + vreplgr2vr.d $vr0, $a0 + vldi $vr1, -1424 + vsigncov.w $vr0, $vr0, $vr1 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 addi.d $a1, $sp, 96 @@ -966,13 +965,11 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - move $a1, $a0 - lu32i.d $a1, 0 - vreplgr2vr.d $vr0, $a1 + vldi $vr0, -1777 vst $vr0, $sp, 80 - pcalau12i $a1, %pc_hi20(.LCPI2_28) - vld $vr0, $a1, %pc_lo12(.LCPI2_28) + pcalau12i $a0, %pc_hi20(.LCPI2_28) + vld $vr0, $a0, %pc_lo12(.LCPI2_28) + addi.w $a0, $zero, -1 lu32i.d $a0, -65536 lu52i.d $a0, $a0, 3 vreplgr2vr.d $vr1, $a0 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsle-1.dir/lsx-vsle-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsle-1.dir/lsx-vsle-1.s index cb02a013..589eabf5 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsle-1.dir/lsx-vsle-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsle-1.dir/lsx-vsle-1.s @@ -480,16 +480,12 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -128 - st.d $ra, $sp, 120 # 8-byte Folded Spill - st.d $fp, $sp, 112 # 8-byte Folded Spill - st.d $s0, $sp, 104 # 8-byte Folded Spill - addi.w $s0, $zero, -1 - move $a0, $s0 - lu32i.d $a0, -256 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - vld $vr0, $a1, %pc_lo12(.LCPI2_0) - vreplgr2vr.d $vr1, $a0 + addi.d $sp, $sp, -112 + st.d $ra, $sp, 104 # 8-byte Folded Spill + st.d $fp, $sp, 96 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(.LCPI2_0) + vld $vr0, $a0, %pc_lo12(.LCPI2_0) + vldi $vr1, -1553 vst $vr1, $sp, 64 vrepli.b $vr1, 0 vst $vr1, $sp, 32 # 16-byte Folded Spill @@ -672,8 +668,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 64 - lu12i.w $a0, 8 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2688 vsle.h $vr0, $vr0, $vr0 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 @@ -715,8 +710,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_17) vld $vr1, $a0, %pc_lo12(.LCPI2_17) vst $vr0, $sp, 64 - ori $a0, $zero, 1024 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2812 vsle.h $vr0, $vr1, $vr0 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 @@ -822,8 +816,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 64 - lu52i.d $a0, $zero, 1023 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -912 vsle.w $vr0, $vr0, $vr0 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 @@ -849,10 +842,9 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 32 # 16-byte Folded Reload vst $vr0, $sp, 64 - lu12i.w $a0, 4096 - vreplgr2vr.w $vr0, $a0 - vrepli.h $vr1, 64 - vsle.d $vr0, $vr0, $vr1 + vrepli.h $vr0, 64 + vldi $vr1, -3327 + vsle.d $vr0, $vr1, $vr0 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 addi.d $a1, $sp, 80 @@ -876,8 +868,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 64 - lu32i.d $s0, 0 - vreplgr2vr.d $vr0, $s0 + vldi $vr0, -1777 vsle.d $vr0, $vr0, $vr0 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 @@ -902,10 +893,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 104 # 8-byte Folded Reload - ld.d $fp, $sp, 112 # 8-byte Folded Reload - ld.d $ra, $sp, 120 # 8-byte Folded Reload - addi.d $sp, $sp, 128 + ld.d $fp, $sp, 96 # 8-byte Folded Reload + ld.d $ra, $sp, 104 # 8-byte Folded Reload + addi.d $sp, $sp, 112 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsle-2.dir/lsx-vsle-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsle-2.dir/lsx-vsle-2.s index 3caa1873..766f6a0c 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsle-2.dir/lsx-vsle-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsle-2.dir/lsx-vsle-2.s @@ -628,8 +628,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 80 # 16-byte Folded Reload vst $vr0, $sp, 96 - ori $a0, $zero, 512 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3838 vsle.bu $vr0, $vr0, $vr0 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 @@ -695,13 +694,11 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -16 - pcalau12i $a1, %pc_hi20(.LCPI2_8) - vld $vr0, $a1, %pc_lo12(.LCPI2_8) - pcalau12i $a1, %pc_hi20(.LCPI2_9) - vld $vr1, $a1, %pc_lo12(.LCPI2_9) - lu32i.d $a0, 0 - vreplgr2vr.d $vr2, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_8) + vld $vr0, $a0, %pc_lo12(.LCPI2_8) + pcalau12i $a0, %pc_hi20(.LCPI2_9) + vld $vr1, $a0, %pc_lo12(.LCPI2_9) + vldi $vr2, -1780 vst $vr2, $sp, 96 vsle.hu $vr0, $vr1, $vr0 vst $vr0, $sp, 112 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslei-1.dir/lsx-vslei-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslei-1.dir/lsx-vslei-1.s index 4c0a7e14..6ab6bd13 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslei-1.dir/lsx-vslei-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslei-1.dir/lsx-vslei-1.s @@ -449,11 +449,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -4081 - pcalau12i $a1, %pc_hi20(.LCPI2_2) - vld $vr0, $a1, %pc_lo12(.LCPI2_2) - ori $a0, $a0, 4095 - vreplgr2vr.d $vr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_2) + vld $vr0, $a0, %pc_lo12(.LCPI2_2) + vldi $vr1, -1541 vst $vr1, $sp, 96 vslei.b $vr0, $vr0, 11 vst $vr0, $sp, 112 @@ -656,8 +654,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 80 # 16-byte Folded Reload vst $vr0, $sp, 96 - lu12i.w $a0, 261120 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -1416 vslei.w $vr0, $vr0, 1 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslei-2.dir/lsx-vslei-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslei-2.dir/lsx-vslei-2.s index 110ccdab..6f7a8f6a 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslei-2.dir/lsx-vslei-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslei-2.dir/lsx-vslei-2.s @@ -492,21 +492,20 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -160 - st.d $ra, $sp, 152 # 8-byte Folded Spill - st.d $fp, $sp, 144 # 8-byte Folded Spill - st.d $s0, $sp, 136 # 8-byte Folded Spill + addi.d $sp, $sp, -128 + st.d $ra, $sp, 120 # 8-byte Folded Spill + st.d $fp, $sp, 112 # 8-byte Folded Spill vrepli.b $vr0, 0 - vst $vr0, $sp, 80 # 16-byte Folded Spill - vst $vr0, $sp, 96 - vrepli.b $vr0, -1 vst $vr0, $sp, 64 # 16-byte Folded Spill + vst $vr0, $sp, 80 + vrepli.b $vr0, -1 + vst $vr0, $sp, 48 # 16-byte Folded Spill vslei.bu $vr0, $vr0, 7 - vst $vr0, $sp, 112 + vst $vr0, $sp, 96 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $fp, $a0, %pc_lo12(.L.str.5) - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 22 move $a3, $fp @@ -514,41 +513,39 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr1, $a0, %pc_lo12(.LCPI2_0) - vst $vr1, $sp, 48 # 16-byte Folded Spill + vst $vr1, $sp, 32 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_1) vld $vr0, $a0, %pc_lo12(.LCPI2_1) - vst $vr1, $sp, 96 + vst $vr1, $sp, 80 vslei.bu $vr0, $vr0, 30 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 27 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - pcalau12i $a1, %pc_hi20(.LCPI2_2) - vld $vr0, $a1, %pc_lo12(.LCPI2_2) - ori $a0, $a0, 3840 - vreplgr2vr.d $vr1, $a0 - vst $vr1, $sp, 96 + pcalau12i $a0, %pc_hi20(.LCPI2_2) + vld $vr0, $a0, %pc_lo12(.LCPI2_2) + vldi $vr1, -1790 + vst $vr1, $sp, 80 vslei.bu $vr0, $vr0, 13 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 32 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 80 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload vslei.bu $vr0, $vr0, 21 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 37 move $a3, $fp @@ -556,12 +553,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_3) vld $vr0, $a0, %pc_lo12(.LCPI2_3) - vld $vr1, $sp, 64 # 16-byte Folded Reload - vst $vr1, $sp, 96 + vld $vr1, $sp, 48 # 16-byte Folded Reload + vst $vr1, $sp, 80 vslei.bu $vr0, $vr0, 20 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 42 move $a3, $fp @@ -571,11 +568,11 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_4) pcalau12i $a0, %pc_hi20(.LCPI2_5) vld $vr1, $a0, %pc_lo12(.LCPI2_5) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vslei.bu $vr0, $vr1, 20 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 47 move $a3, $fp @@ -583,24 +580,24 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_6) vld $vr0, $a0, %pc_lo12(.LCPI2_6) - vld $vr1, $sp, 64 # 16-byte Folded Reload - vst $vr1, $sp, 96 + vld $vr1, $sp, 48 # 16-byte Folded Reload + vst $vr1, $sp, 80 vslei.bu $vr0, $vr0, 27 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 52 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 80 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload vslei.bu $vr0, $vr0, 19 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 57 move $a3, $fp @@ -608,12 +605,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_7) vld $vr0, $a0, %pc_lo12(.LCPI2_7) - vld $vr1, $sp, 48 # 16-byte Folded Reload - vst $vr1, $sp, 96 + vld $vr1, $sp, 32 # 16-byte Folded Reload + vst $vr1, $sp, 80 vslei.bu $vr0, $vr0, 14 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 62 move $a3, $fp @@ -623,23 +620,23 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_8) pcalau12i $a0, %pc_hi20(.LCPI2_9) vld $vr1, $a0, %pc_lo12(.LCPI2_9) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vslei.bu $vr0, $vr1, 28 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 67 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 80 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload vslei.bu $vr0, $vr0, 1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 72 move $a3, $fp @@ -647,12 +644,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_10) vld $vr0, $a0, %pc_lo12(.LCPI2_10) - vld $vr1, $sp, 64 # 16-byte Folded Reload - vst $vr1, $sp, 96 + vld $vr1, $sp, 48 # 16-byte Folded Reload + vst $vr1, $sp, 80 vslei.bu $vr0, $vr0, 1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 77 move $a3, $fp @@ -660,25 +657,25 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_11) vld $vr0, $a0, %pc_lo12(.LCPI2_11) - vld $vr1, $sp, 48 # 16-byte Folded Reload - vst $vr1, $sp, 96 + vld $vr1, $sp, 32 # 16-byte Folded Reload + vst $vr1, $sp, 80 vslei.bu $vr0, $vr0, 29 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 82 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 80 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload vslei.hu $vr0, $vr0, 29 vst $vr0, $sp, 16 # 16-byte Folded Spill - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 87 move $a3, $fp @@ -688,41 +685,37 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_12) pcalau12i $a0, %pc_hi20(.LCPI2_13) vld $vr1, $a0, %pc_lo12(.LCPI2_13) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vslei.hu $vr0, $vr1, 13 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 92 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 65535 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 32 # 16-byte Folded Spill - vst $vr0, $sp, 96 - ori $s0, $zero, 0 + vldi $vr0, -1729 + vst $vr0, $sp, 80 ori $a0, $zero, 0 lu32i.d $a0, 16 lu52i.d $a0, $a0, 1039 vreplgr2vr.d $vr0, $a0 vslei.hu $vr0, $vr0, 21 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 97 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 96 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 80 vld $vr0, $sp, 16 # 16-byte Folded Reload - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 102 move $a3, $fp @@ -732,11 +725,11 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_14) lu12i.w $a0, -16 vreplgr2vr.w $vr1, $a0 - vst $vr1, $sp, 96 + vst $vr1, $sp, 80 vslei.hu $vr0, $vr0, 14 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 107 move $a3, $fp @@ -746,11 +739,11 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_15) pcalau12i $a0, %pc_hi20(.LCPI2_16) vld $vr1, $a0, %pc_lo12(.LCPI2_16) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vslei.hu $vr0, $vr1, 9 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 112 move $a3, $fp @@ -760,36 +753,36 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_17) pcalau12i $a0, %pc_hi20(.LCPI2_18) vld $vr1, $a0, %pc_lo12(.LCPI2_18) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vslei.hu $vr0, $vr1, 9 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 117 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 32 # 16-byte Folded Reload - vst $vr0, $sp, 96 + vldi $vr0, -1729 + vst $vr0, $sp, 80 lu52i.d $a0, $zero, 2047 vreplgr2vr.d $vr0, $a0 vslei.hu $vr0, $vr0, 22 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 122 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 80 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload vslei.hu $vr0, $vr0, 10 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 127 move $a3, $fp @@ -799,11 +792,11 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_19) pcalau12i $a0, %pc_hi20(.LCPI2_20) vld $vr1, $a0, %pc_lo12(.LCPI2_20) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vslei.wu $vr0, $vr1, 12 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 132 move $a3, $fp @@ -813,11 +806,11 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_21) pcalau12i $a0, %pc_hi20(.LCPI2_22) vld $vr1, $a0, %pc_lo12(.LCPI2_22) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vslei.wu $vr0, $vr1, 5 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 137 move $a3, $fp @@ -827,11 +820,11 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_23) pcalau12i $a0, %pc_hi20(.LCPI2_24) vld $vr1, $a0, %pc_lo12(.LCPI2_24) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vslei.wu $vr0, $vr1, 0 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 142 move $a3, $fp @@ -841,11 +834,11 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_25) pcalau12i $a0, %pc_hi20(.LCPI2_26) vld $vr1, $a0, %pc_lo12(.LCPI2_26) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vslei.wu $vr0, $vr1, 31 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 147 move $a3, $fp @@ -855,23 +848,23 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_27) pcalau12i $a0, %pc_hi20(.LCPI2_28) vld $vr1, $a0, %pc_lo12(.LCPI2_28) - vst $vr0, $sp, 96 + vst $vr0, $sp, 80 vslei.wu $vr0, $vr1, 14 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 152 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 80 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload vslei.wu $vr0, $vr0, 15 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 157 move $a3, $fp @@ -879,85 +872,84 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_29) vld $vr0, $a0, %pc_lo12(.LCPI2_29) - vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 96 + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 80 vslei.wu $vr0, $vr0, 17 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 162 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 80 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload vslei.wu $vr0, $vr0, 23 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 167 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 48 # 16-byte Folded Reload vslei.wu $vr0, $vr0, 13 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 172 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 80 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload vslei.du $vr0, $vr0, 13 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 177 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 80 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload vslei.du $vr0, $vr0, 9 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 182 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vld $vr0, $sp, 48 # 16-byte Folded Reload vslei.du $vr0, $vr0, 16 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 187 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 - lu32i.d $s0, 255 - vreplgr2vr.d $vr0, $s0 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 + vldi $vr0, -1776 vslei.du $vr0, $vr0, 30 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 192 move $a3, $fp @@ -965,36 +957,36 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_30) vld $vr0, $a0, %pc_lo12(.LCPI2_30) - vld $vr1, $sp, 48 # 16-byte Folded Reload - vst $vr1, $sp, 96 + vld $vr1, $sp, 32 # 16-byte Folded Reload + vst $vr1, $sp, 80 vslei.du $vr0, $vr0, 10 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 197 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 96 + vld $vr0, $sp, 64 # 16-byte Folded Reload + vst $vr0, $sp, 80 vrepli.w $vr0, 1 vslei.du $vr0, $vr0, 5 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 202 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 + vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 80 vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 96 - vld $vr0, $sp, 80 # 16-byte Folded Reload vslei.du $vr0, $vr0, 1 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 207 move $a3, $fp @@ -1002,22 +994,21 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_31) vld $vr0, $a0, %pc_lo12(.LCPI2_31) - vld $vr1, $sp, 80 # 16-byte Folded Reload - vst $vr1, $sp, 96 + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 80 vslei.du $vr0, $vr0, 19 - vst $vr0, $sp, 112 - addi.d $a0, $sp, 96 - addi.d $a1, $sp, 112 + vst $vr0, $sp, 96 + addi.d $a0, $sp, 80 + addi.d $a1, $sp, 96 ori $a2, $zero, 16 ori $a4, $zero, 212 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 136 # 8-byte Folded Reload - ld.d $fp, $sp, 144 # 8-byte Folded Reload - ld.d $ra, $sp, 152 # 8-byte Folded Reload - addi.d $sp, $sp, 160 + ld.d $fp, $sp, 112 # 8-byte Folded Reload + ld.d $ra, $sp, 120 # 8-byte Folded Reload + addi.d $sp, $sp, 128 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslli.dir/lsx-vslli.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslli.dir/lsx-vslli.s index 09b826a6..befeabd3 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslli.dir/lsx-vslli.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslli.dir/lsx-vslli.s @@ -563,8 +563,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, -524288 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3200 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsllwil-1.dir/lsx-vsllwil-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsllwil-1.dir/lsx-vsllwil-1.s index 2847e506..62d46e49 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsllwil-1.dir/lsx-vsllwil-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsllwil-1.dir/lsx-vsllwil-1.s @@ -783,9 +783,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 16 # 16-byte Folded Reload vst $vr0, $sp, 48 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1777 vsllwil.w.h $vr0, $vr0, 0 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsllwil-2.dir/lsx-vsllwil-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsllwil-2.dir/lsx-vsllwil-2.s index 0d93f0d9..f243c76d 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsllwil-2.dir/lsx-vsllwil-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsllwil-2.dir/lsx-vsllwil-2.s @@ -708,11 +708,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -256 - pcalau12i $a1, %pc_hi20(.LCPI2_24) - vld $vr0, $a1, %pc_lo12(.LCPI2_24) - lu32i.d $a0, 255 - vreplgr2vr.d $vr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_24) + vld $vr0, $a0, %pc_lo12(.LCPI2_24) + vldi $vr1, -1762 vst $vr1, $sp, 48 vsllwil.du.wu $vr0, $vr0, 8 vst $vr0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslt-1.dir/lsx-vslt-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslt-1.dir/lsx-vslt-1.s index 1c8f2d37..14ad7dc9 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslt-1.dir/lsx-vslt-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslt-1.dir/lsx-vslt-1.s @@ -696,10 +696,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -144 - st.d $ra, $sp, 136 # 8-byte Folded Spill - st.d $fp, $sp, 128 # 8-byte Folded Spill - st.d $s0, $sp, 120 # 8-byte Folded Spill + addi.d $sp, $sp, -128 + st.d $ra, $sp, 120 # 8-byte Folded Spill + st.d $fp, $sp, 112 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) @@ -845,10 +844,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $s0, $zero, 0 - ori $a0, $zero, 0 - lu32i.d $a0, 255 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1776 vst $vr0, $sp, 80 vrepli.w $vr0, 4 vrepli.d $vr1, 4 @@ -908,9 +904,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1777 vst $vr0, $sp, 80 pcalau12i $a0, %pc_hi20(.LCPI2_23) vld $vr0, $a0, %pc_lo12(.LCPI2_23) @@ -943,10 +937,9 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 64 # 16-byte Folded Reload vst $vr0, $sp, 80 - lu12i.w $a0, 64 - vreplgr2vr.w $vr0, $a0 - vrepli.h $vr1, -10 - vslt.h $vr0, $vr0, $vr1 + vrepli.h $vr0, -10 + vldi $vr1, -3580 + vslt.h $vr0, $vr1, $vr0 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 addi.d $a1, $sp, 96 @@ -1049,11 +1042,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - pcalau12i $a1, %pc_hi20(.LCPI2_31) - vld $vr0, $a1, %pc_lo12(.LCPI2_31) - ori $a0, $a0, 4095 - vreplgr2vr.w $vr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_31) + vld $vr0, $a0, %pc_lo12(.LCPI2_31) + vldi $vr1, -2305 vst $vr1, $sp, 80 vld $vr1, $sp, 64 # 16-byte Folded Reload vslt.h $vr0, $vr0, $vr1 @@ -1097,8 +1088,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu32i.d $s0, -65536 - vreplgr2vr.d $vr0, $s0 + vldi $vr0, -1600 vst $vr0, $sp, 80 lu52i.d $a0, $zero, -1 vreplgr2vr.d $vr0, $a0 @@ -1354,10 +1344,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 120 # 8-byte Folded Reload - ld.d $fp, $sp, 128 # 8-byte Folded Reload - ld.d $ra, $sp, 136 # 8-byte Folded Reload - addi.d $sp, $sp, 144 + ld.d $fp, $sp, 112 # 8-byte Folded Reload + ld.d $ra, $sp, 120 # 8-byte Folded Reload + addi.d $sp, $sp, 128 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslt-2.dir/lsx-vslt-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslt-2.dir/lsx-vslt-2.s index a000493c..272b4db2 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslt-2.dir/lsx-vslt-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslt-2.dir/lsx-vslt-2.s @@ -681,16 +681,14 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - ori $a1, $zero, 0 - lu32i.d $a1, -1 - vreplgr2vr.d $vr0, $a1 - pcalau12i $a1, %pc_hi20(.LCPI2_20) - vld $vr1, $a1, %pc_lo12(.LCPI2_20) + vldi $vr0, -1552 vst $vr0, $sp, 64 + pcalau12i $a0, %pc_hi20(.LCPI2_20) + vld $vr0, $a0, %pc_lo12(.LCPI2_20) + ori $a0, $zero, 0 lu32i.d $a0, 32 - vreplgr2vr.d $vr0, $a0 - vslt.wu $vr0, $vr1, $vr0 + vreplgr2vr.d $vr1, $a0 + vslt.wu $vr0, $vr0, $vr1 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 addi.d $a1, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslti-1.dir/lsx-vslti-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslti-1.dir/lsx-vslti-1.s index 683b0044..0d9b22eb 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslti-1.dir/lsx-vslti-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslti-1.dir/lsx-vslti-1.s @@ -400,9 +400,7 @@ main: # @main st.d $ra, $sp, 120 # 8-byte Folded Spill st.d $fp, $sp, 112 # 8-byte Folded Spill st.d $s0, $sp, 104 # 8-byte Folded Spill - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1777 vst $vr0, $sp, 64 addi.w $a0, $zero, -2 lu32i.d $a0, 1 @@ -418,12 +416,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $s0, $zero, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -256 - lu52i.d $a0, $a0, 15 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1696 vst $vr0, $sp, 64 + ori $s0, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, -65792 lu52i.d $a0, $a0, 15 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslti-2.dir/lsx-vslti-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslti-2.dir/lsx-vslti-2.s index 70592890..a3a6717e 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslti-2.dir/lsx-vslti-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vslti-2.dir/lsx-vslti-2.s @@ -408,7 +408,6 @@ main: # @main st.d $ra, $sp, 136 # 8-byte Folded Spill st.d $fp, $sp, 128 # 8-byte Folded Spill st.d $s0, $sp, 120 # 8-byte Folded Spill - st.d $s1, $sp, 112 # 8-byte Folded Spill vrepli.b $vr0, -1 vst $vr0, $sp, 48 # 16-byte Folded Spill vst $vr0, $sp, 80 @@ -425,10 +424,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $s1, $zero, -1 - lu32i.d $s1, 255 - lu52i.d $a0, $s1, -16 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1633 vst $vr0, $sp, 80 ori $s0, $zero, 0 ori $a0, $zero, 0 @@ -470,11 +466,10 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vreplgr2vr.d $vr0, $s1 + vldi $vr0, -1761 vst $vr0, $sp, 80 - ori $a0, $zero, 0 - lu32i.d $a0, -8192 - lu52i.d $a0, $a0, 881 + lu32i.d $s0, -8192 + lu52i.d $a0, $s0, 881 vreplgr2vr.d $vr0, $a0 vslti.bu $vr0, $vr0, 8 vst $vr0, $sp, 96 @@ -615,9 +610,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_10) vld $vr0, $a0, %pc_lo12(.LCPI2_10) - lu32i.d $s0, -1 - vreplgr2vr.d $vr1, $s0 - vst $vr1, $sp, 16 # 16-byte Folded Spill + vldi $vr1, -1552 vst $vr1, $sp, 80 vslti.hu $vr0, $vr0, 6 vst $vr0, $sp, 96 @@ -682,7 +675,7 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_14) vld $vr0, $a0, %pc_lo12(.LCPI2_14) - vld $vr1, $sp, 16 # 16-byte Folded Reload + vldi $vr1, -1552 vst $vr1, $sp, 80 vslti.wu $vr0, $vr0, 22 vst $vr0, $sp, 96 @@ -926,7 +919,6 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s1, $sp, 112 # 8-byte Folded Reload ld.d $s0, $sp, 120 # 8-byte Folded Reload ld.d $fp, $sp, 128 # 8-byte Folded Reload ld.d $ra, $sp, 136 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsra.dir/lsx-vsra.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsra.dir/lsx-vsra.s index d2bfae86..22007758 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsra.dir/lsx-vsra.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsra.dir/lsx-vsra.s @@ -359,9 +359,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1552 vst $vr0, $sp, 48 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 @@ -466,8 +464,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 261120 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -1416 vst $vr0, $sp, 48 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrai.dir/lsx-vsrai.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrai.dir/lsx-vsrai.s index 67a7b591..a0b7bfdf 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrai.dir/lsx-vsrai.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrai.dir/lsx-vsrai.s @@ -279,10 +279,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -96 - st.d $ra, $sp, 88 # 8-byte Folded Spill - st.d $fp, $sp, 80 # 8-byte Folded Spill - st.d $s0, $sp, 72 # 8-byte Folded Spill + addi.d $sp, $sp, -80 + st.d $ra, $sp, 72 # 8-byte Folded Spill + st.d $fp, $sp, 64 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) vst $vr0, $sp, 32 @@ -339,7 +338,6 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $s0, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, -65536 lu52i.d $a0, $a0, 31 @@ -449,8 +447,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu32i.d $s0, -1 - vreplgr2vr.d $vr0, $s0 + vldi $vr0, -1552 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 @@ -647,10 +644,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 72 # 8-byte Folded Reload - ld.d $fp, $sp, 80 # 8-byte Folded Reload - ld.d $ra, $sp, 88 # 8-byte Folded Reload - addi.d $sp, $sp, 96 + ld.d $fp, $sp, 64 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + addi.d $sp, $sp, 80 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrani.dir/lsx-vsrani.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrani.dir/lsx-vsrani.s index 73cd7a7b..ea81808e 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrani.dir/lsx-vsrani.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrani.dir/lsx-vsrani.s @@ -771,8 +771,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_28) vld $vr0, $a0, %pc_lo12(.LCPI2_28) vst $vr0, $sp, 48 - ori $a0, $zero, 2048 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2808 vld $vr1, $sp, 32 # 16-byte Folded Reload vsrani.w.d $vr1, $vr0, 21 vst $vr1, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrari.dir/lsx-vsrari.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrari.dir/lsx-vsrari.s index afa09bd2..d2f7bd7d 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrari.dir/lsx-vsrari.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrari.dir/lsx-vsrari.s @@ -750,9 +750,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 32 # 16-byte Folded Reload vst $vr0, $sp, 48 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1552 vsrari.w $vr0, $vr0, 29 vst $vr0, $sp, 64 addi.d $a0, $sp, 48 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrl.dir/lsx-vsrl.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrl.dir/lsx-vsrl.s index 5df4fff4..1c3ed09f 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrl.dir/lsx-vsrl.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrl.dir/lsx-vsrl.s @@ -455,8 +455,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 1024 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2812 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 @@ -730,9 +729,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 - lu32i.d $a0, 65535 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1729 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrli.dir/lsx-vsrli.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrli.dir/lsx-vsrli.s index e06805ee..5d712e1d 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrli.dir/lsx-vsrli.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrli.dir/lsx-vsrli.s @@ -546,9 +546,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 31 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -2303 vst $vr0, $sp, 32 vst $vr0, $sp, 48 addi.d $a0, $sp, 32 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrln.dir/lsx-vsrln.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrln.dir/lsx-vsrln.s index 84120290..562e5d89 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrln.dir/lsx-vsrln.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrln.dir/lsx-vsrln.s @@ -525,8 +525,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr1, $sp, 64 # 16-byte Folded Reload vst $vr1, $sp, 80 - lu12i.w $a0, -524288 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3200 vsrln.h.w $vr0, $vr1, $vr0 vst $vr0, $sp, 96 addi.d $a0, $sp, 80 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlni.dir/lsx-vsrlni.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlni.dir/lsx-vsrlni.s index 100b574e..9bd3afba 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlni.dir/lsx-vsrlni.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlni.dir/lsx-vsrlni.s @@ -589,9 +589,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_13) vld $vr0, $a0, %pc_lo12(.LCPI2_13) vst $vr0, $sp, 48 - addi.w $a0, $zero, -1 - lu32i.d $a0, 65535 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1729 vld $vr1, $sp, 32 # 16-byte Folded Reload vsrlni.h.w $vr1, $vr0, 18 vst $vr1, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlr.dir/lsx-vsrlr.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlr.dir/lsx-vsrlr.s index e098ce68..20d57636 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlr.dir/lsx-vsrlr.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlr.dir/lsx-vsrlr.s @@ -1084,8 +1084,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 1024 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3520 vst $vr0, $sp, 80 vrepli.h $vr0, 10 vld $vr1, $sp, 48 # 16-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlri.dir/lsx-vsrlri.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlri.dir/lsx-vsrlri.s index 17750c5f..2025d6c7 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlri.dir/lsx-vsrlri.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlri.dir/lsx-vsrlri.s @@ -477,8 +477,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $a0, $zero, 512 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2814 vst $vr0, $sp, 64 vrepli.b $vr0, -1 vst $vr0, $sp, 32 # 16-byte Folded Spill @@ -584,8 +583,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 8 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2688 vst $vr0, $sp, 64 vld $vr0, $sp, 32 # 16-byte Folded Reload vsrlri.h $vr0, $vr0, 1 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlrni.dir/lsx-vsrlrni.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlrni.dir/lsx-vsrlrni.s index 0e9932c9..909c64e0 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlrni.dir/lsx-vsrlrni.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsrlrni.dir/lsx-vsrlrni.s @@ -1822,8 +1822,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_96) vld $vr1, $a0, %pc_lo12(.LCPI2_96) vst $vr0, $sp, 96 - lu12i.w $a0, 2 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3808 vsrlrni.d.q $vr1, $vr0, 37 vst $vr1, $sp, 112 addi.d $a0, $sp, 96 @@ -2014,14 +2013,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_114) vld $vr0, $a0, %pc_lo12(.LCPI2_114) - vst $vr0, $sp, 96 pcalau12i $a0, %pc_hi20(.LCPI2_115) - vld $vr0, $a0, %pc_lo12(.LCPI2_115) - addi.w $a0, $zero, -1 - lu32i.d $a0, -256 - vreplgr2vr.d $vr1, $a0 - vsrlrni.d.q $vr0, $vr1, 116 - vst $vr0, $sp, 112 + vld $vr1, $a0, %pc_lo12(.LCPI2_115) + vst $vr0, $sp, 96 + vldi $vr0, -1553 + vsrlrni.d.q $vr1, $vr0, 116 + vst $vr1, $sp, 112 addi.d $a0, $sp, 96 addi.d $a1, $sp, 112 ori $a2, $zero, 16 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrani.dir/lsx-vssrani.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrani.dir/lsx-vssrani.s index a3cb7bdf..f7ddbd4f 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrani.dir/lsx-vssrani.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrani.dir/lsx-vssrani.s @@ -994,10 +994,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -144 - st.d $ra, $sp, 136 # 8-byte Folded Spill - st.d $fp, $sp, 128 # 8-byte Folded Spill - st.d $s0, $sp, 120 # 8-byte Folded Spill + addi.d $sp, $sp, -128 + st.d $ra, $sp, 120 # 8-byte Folded Spill + st.d $fp, $sp, 112 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) @@ -1595,8 +1594,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_64) vld $vr0, $a0, %pc_lo12(.LCPI2_64) vst $vr0, $sp, 80 - lu12i.w $a0, -524288 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -3200 vld $vr1, $sp, 64 # 16-byte Folded Reload vssrani.d.q $vr0, $vr1, 96 vst $vr0, $sp, 96 @@ -1626,7 +1624,6 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_68) vld $vr0, $a0, %pc_lo12(.LCPI2_68) vst $vr0, $sp, 80 - ori $s0, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, 2048 vreplgr2vr.d $vr0, $a0 @@ -1686,13 +1683,11 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 64 # 16-byte Folded Reload - vst $vr0, $sp, 80 pcalau12i $a0, %pc_hi20(.LCPI2_76) vld $vr0, $a0, %pc_lo12(.LCPI2_76) - ori $a0, $zero, 0 - lu32i.d $a0, -65536 - vreplgr2vr.d $vr1, $a0 + vld $vr1, $sp, 64 # 16-byte Folded Reload + vst $vr1, $sp, 80 + vldi $vr1, -1600 vssrani.bu.h $vr1, $vr0, 10 vst $vr1, $sp, 96 addi.d $a0, $sp, 80 @@ -1706,8 +1701,7 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_77) vld $vr1, $sp, 64 # 16-byte Folded Reload vst $vr1, $sp, 80 - lu32i.d $s0, -1 - vreplgr2vr.d $vr1, $s0 + vldi $vr1, -1552 vssrani.hu.w $vr1, $vr0, 18 vst $vr1, $sp, 96 addi.d $a0, $sp, 80 @@ -2049,10 +2043,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 120 # 8-byte Folded Reload - ld.d $fp, $sp, 128 # 8-byte Folded Reload - ld.d $ra, $sp, 136 # 8-byte Folded Reload - addi.d $sp, $sp, 144 + ld.d $fp, $sp, 112 # 8-byte Folded Reload + ld.d $ra, $sp, 120 # 8-byte Folded Reload + addi.d $sp, $sp, 128 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrarn.dir/lsx-vssrarn.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrarn.dir/lsx-vssrarn.s index 44730260..b7b8f761 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrarn.dir/lsx-vssrarn.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrarn.dir/lsx-vssrarn.s @@ -741,13 +741,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_5) vld $vr0, $a0, %pc_lo12(.LCPI2_5) - vst $vr0, $sp, 112 pcalau12i $a0, %pc_hi20(.LCPI2_6) - vld $vr0, $a0, %pc_lo12(.LCPI2_6) - addi.w $a0, $zero, -1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr1, $a0 - vssrarn.b.h $vr0, $vr1, $vr0 + vld $vr1, $a0, %pc_lo12(.LCPI2_6) + vst $vr0, $sp, 112 + vldi $vr0, -1777 + vssrarn.b.h $vr0, $vr0, $vr1 vst $vr0, $sp, 128 addi.d $a0, $sp, 112 addi.d $a1, $sp, 128 @@ -1282,12 +1280,11 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_49) vld $vr0, $a0, %pc_lo12(.LCPI2_49) vst $vr0, $sp, 112 - lu12i.w $a0, 260096 - vreplgr2vr.w $vr0, $a0 lu12i.w $a0, 262143 ori $a0, $a0, 3840 - vreplgr2vr.d $vr1, $a0 - vssrarn.bu.h $vr0, $vr1, $vr0 + vreplgr2vr.d $vr0, $a0 + vldi $vr1, -1424 + vssrarn.bu.h $vr0, $vr0, $vr1 vst $vr0, $sp, 128 addi.d $a0, $sp, 112 addi.d $a1, $sp, 128 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrarni.dir/lsx-vssrarni.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrarni.dir/lsx-vssrarni.s index 4cec2113..7c076475 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrarni.dir/lsx-vssrarni.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrarni.dir/lsx-vssrarni.s @@ -1105,22 +1105,21 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -192 - st.d $ra, $sp, 184 # 8-byte Folded Spill - st.d $fp, $sp, 176 # 8-byte Folded Spill - st.d $s0, $sp, 168 # 8-byte Folded Spill - st.d $s1, $sp, 160 # 8-byte Folded Spill + addi.d $sp, $sp, -176 + st.d $ra, $sp, 168 # 8-byte Folded Spill + st.d $fp, $sp, 160 # 8-byte Folded Spill + st.d $s0, $sp, 152 # 8-byte Folded Spill vrepli.b $vr1, 0 - vst $vr1, $sp, 112 # 16-byte Folded Spill + vst $vr1, $sp, 96 # 16-byte Folded Spill vrepli.b $vr0, -1 - vst $vr0, $sp, 96 # 16-byte Folded Spill - vst $vr1, $sp, 128 + vst $vr0, $sp, 80 # 16-byte Folded Spill + vst $vr1, $sp, 112 vssrarni.b.h $vr0, $vr0, 15 - vst $vr0, $sp, 144 + vst $vr0, $sp, 128 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $fp, $a0, %pc_lo12(.L.str.5) - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 23 move $a3, $fp @@ -1130,13 +1129,13 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) vld $vr1, $a0, %pc_lo12(.LCPI2_1) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 lu12i.w $a0, 522240 vreplgr2vr.w $vr0, $a0 vssrarni.b.h $vr0, $vr1, 13 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 29 move $a3, $fp @@ -1144,13 +1143,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_2) vld $vr0, $a0, %pc_lo12(.LCPI2_2) - vst $vr0, $sp, 80 # 16-byte Folded Spill - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vst $vr0, $sp, 64 # 16-byte Folded Spill + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.b.h $vr0, $vr1, 5 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 35 move $a3, $fp @@ -1160,12 +1159,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_3) pcalau12i $a0, %pc_hi20(.LCPI2_4) vld $vr1, $a0, %pc_lo12(.LCPI2_4) - vst $vr0, $sp, 128 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 64 # 16-byte Folded Reload vssrarni.b.h $vr1, $vr0, 7 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 41 move $a3, $fp @@ -1175,12 +1174,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_5) pcalau12i $a0, %pc_hi20(.LCPI2_6) vld $vr1, $a0, %pc_lo12(.LCPI2_6) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vssrarni.b.h $vr1, $vr0, 9 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 47 move $a3, $fp @@ -1188,25 +1187,25 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_7) vld $vr0, $a0, %pc_lo12(.LCPI2_7) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vld $vr1, $sp, 80 # 16-byte Folded Reload vssrarni.b.h $vr0, $vr1, 11 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 53 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 - vld $vr1, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr1, $sp, 80 # 16-byte Folded Reload vssrarni.b.h $vr0, $vr1, 7 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 59 move $a3, $fp @@ -1218,11 +1217,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_9) pcalau12i $a0, %pc_hi20(.LCPI2_10) vld $vr2, $a0, %pc_lo12(.LCPI2_10) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.b.h $vr2, $vr1, 2 - vst $vr2, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 65 move $a3, $fp @@ -1230,12 +1229,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_11) vld $vr0, $a0, %pc_lo12(.LCPI2_11) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.h.w $vr0, $vr1, 30 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 71 move $a3, $fp @@ -1243,13 +1242,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_12) vld $vr0, $a0, %pc_lo12(.LCPI2_12) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vrepli.d $vr0, 1 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vssrarni.h.w $vr0, $vr1, 1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 77 move $a3, $fp @@ -1257,28 +1256,28 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_13) vld $vr0, $a0, %pc_lo12(.LCPI2_13) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.h.w $vr0, $vr0, 17 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 83 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 pcalau12i $a0, %pc_hi20(.LCPI2_14) vld $vr0, $a0, %pc_lo12(.LCPI2_14) lu12i.w $a0, -32 ori $a0, $a0, 1 vreplgr2vr.w $vr1, $a0 vssrarni.h.w $vr1, $vr0, 21 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 89 move $a3, $fp @@ -1288,12 +1287,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_15) pcalau12i $a0, %pc_hi20(.LCPI2_16) vld $vr1, $a0, %pc_lo12(.LCPI2_16) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vssrarni.h.w $vr0, $vr1, 25 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 95 move $a3, $fp @@ -1305,22 +1304,22 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_18) pcalau12i $a0, %pc_hi20(.LCPI2_19) vld $vr2, $a0, %pc_lo12(.LCPI2_19) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.h.w $vr2, $vr1, 17 - vst $vr2, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 101 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.h.w $vr0, $vr0, 28 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 107 move $a3, $fp @@ -1328,12 +1327,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_20) vld $vr0, $a0, %pc_lo12(.LCPI2_20) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.h.w $vr0, $vr1, 28 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 113 move $a3, $fp @@ -1343,47 +1342,47 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_21) pcalau12i $a0, %pc_hi20(.LCPI2_22) vld $vr1, $a0, %pc_lo12(.LCPI2_22) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vssrarni.h.w $vr1, $vr0, 30 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 119 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.h.w $vr0, $vr0, 30 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 125 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vrepli.w $vr0, 128 - vld $vr1, $sp, 96 # 16-byte Folded Reload + vld $vr1, $sp, 80 # 16-byte Folded Reload vssrarni.h.w $vr1, $vr0, 22 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 131 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.h.w $vr0, $vr0, 19 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 137 move $a3, $fp @@ -1395,11 +1394,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_24) pcalau12i $a0, %pc_hi20(.LCPI2_25) vld $vr2, $a0, %pc_lo12(.LCPI2_25) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.w.d $vr2, $vr1, 7 - vst $vr2, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 143 move $a3, $fp @@ -1409,35 +1408,35 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_26) pcalau12i $a0, %pc_hi20(.LCPI2_27) vld $vr1, $a0, %pc_lo12(.LCPI2_27) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vssrarni.w.d $vr1, $vr0, 10 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 149 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 80 # 16-byte Folded Reload vssrarni.w.d $vr0, $vr0, 12 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 155 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.w.d $vr0, $vr0, 50 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 161 move $a3, $fp @@ -1447,12 +1446,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_28) pcalau12i $a0, %pc_hi20(.LCPI2_29) vld $vr1, $a0, %pc_lo12(.LCPI2_29) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vssrarni.w.d $vr1, $vr0, 46 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 167 move $a3, $fp @@ -1462,12 +1461,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_30) pcalau12i $a0, %pc_hi20(.LCPI2_31) vld $vr1, $a0, %pc_lo12(.LCPI2_31) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vssrarni.w.d $vr0, $vr1, 17 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 173 move $a3, $fp @@ -1475,13 +1474,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_32) vld $vr2, $a0, %pc_lo12(.LCPI2_32) - vst $vr2, $sp, 80 # 16-byte Folded Spill - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vst $vr2, $sp, 64 # 16-byte Folded Spill + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.w.d $vr0, $vr2, 41 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 179 move $a3, $fp @@ -1491,13 +1490,13 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_33) pcalau12i $a0, %pc_hi20(.LCPI2_34) vld $vr1, $a0, %pc_lo12(.LCPI2_34) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 lu12i.w $a0, 2 vreplgr2vr.d $vr0, $a0 vssrarni.w.d $vr1, $vr0, 49 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 185 move $a3, $fp @@ -1505,38 +1504,38 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_35) vld $vr1, $a0, %pc_lo12(.LCPI2_35) - vst $vr1, $sp, 64 # 16-byte Folded Spill + vst $vr1, $sp, 48 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_36) vld $vr0, $a0, %pc_lo12(.LCPI2_36) - vst $vr1, $sp, 128 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vld $vr1, $sp, 96 # 16-byte Folded Reload vssrarni.w.d $vr1, $vr0, 50 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 191 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 80 # 16-byte Folded Reload vssrarni.w.d $vr0, $vr0, 45 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 197 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.w.d $vr0, $vr0, 43 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 203 move $a3, $fp @@ -1546,12 +1545,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_37) pcalau12i $a0, %pc_hi20(.LCPI2_38) vld $vr1, $a0, %pc_lo12(.LCPI2_38) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vssrarni.w.d $vr0, $vr1, 20 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 209 move $a3, $fp @@ -1561,12 +1560,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_39) pcalau12i $a0, %pc_hi20(.LCPI2_40) vld $vr1, $a0, %pc_lo12(.LCPI2_40) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vssrarni.w.d $vr0, $vr1, 62 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 215 move $a3, $fp @@ -1574,28 +1573,27 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_41) vld $vr0, $a0, %pc_lo12(.LCPI2_41) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 pcalau12i $a0, %pc_hi20(.LCPI2_42) vld $vr0, $a0, %pc_lo12(.LCPI2_42) - ori $s0, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, 65536 vreplgr2vr.d $vr1, $a0 vssrarni.w.d $vr0, $vr1, 3 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 221 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.d.q $vr0, $vr0, 121 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 227 move $a3, $fp @@ -1603,25 +1601,25 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_43) vld $vr0, $a0, %pc_lo12(.LCPI2_43) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vrepli.h $vr0, 511 - vld $vr1, $sp, 96 # 16-byte Folded Reload + vld $vr1, $sp, 80 # 16-byte Folded Reload vssrarni.d.q $vr1, $vr0, 27 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 233 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 - vld $vr1, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr1, $sp, 80 # 16-byte Folded Reload vssrarni.d.q $vr0, $vr1, 89 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 239 move $a3, $fp @@ -1631,12 +1629,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_44) pcalau12i $a0, %pc_hi20(.LCPI2_45) vld $vr1, $a0, %pc_lo12(.LCPI2_45) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vssrarni.d.q $vr1, $vr0, 14 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 245 move $a3, $fp @@ -1648,49 +1646,49 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_47) pcalau12i $a0, %pc_hi20(.LCPI2_48) vld $vr2, $a0, %pc_lo12(.LCPI2_48) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.d.q $vr2, $vr1, 91 - vst $vr2, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 251 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 - vld $vr1, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr1, $sp, 80 # 16-byte Folded Reload vssrarni.d.q $vr0, $vr1, 18 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 257 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.d.q $vr0, $vr0, 81 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 263 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - addi.w $s1, $zero, -1 - lu52i.d $a0, $s1, 2047 + addi.w $s0, $zero, -1 + lu52i.d $a0, $s0, 2047 vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vrepli.h $vr0, 77 vrepli.b $vr1, 82 vssrarni.d.q $vr1, $vr0, 19 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 269 move $a3, $fp @@ -1700,12 +1698,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_49) pcalau12i $a0, %pc_hi20(.LCPI2_50) vld $vr1, $a0, %pc_lo12(.LCPI2_50) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vssrarni.d.q $vr1, $vr0, 72 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 275 move $a3, $fp @@ -1717,11 +1715,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_52) pcalau12i $a0, %pc_hi20(.LCPI2_53) vld $vr2, $a0, %pc_lo12(.LCPI2_53) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.d.q $vr2, $vr1, 46 - vst $vr2, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 281 move $a3, $fp @@ -1731,52 +1729,48 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_54) pcalau12i $a0, %pc_hi20(.LCPI2_55) vld $vr1, $a0, %pc_lo12(.LCPI2_55) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vrepli.h $vr0, 32 vssrarni.bu.h $vr0, $vr1, 10 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 287 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - move $a0, $s1 - lu32i.d $a0, 0 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 16 # 16-byte Folded Spill - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vldi $vr0, -1777 vssrarni.bu.h $vr0, $vr0, 11 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 293 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 - lu12i.w $a0, 32 - vreplgr2vr.w $vr0, $a0 - vld $vr1, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vldi $vr0, -3582 + vld $vr1, $sp, 64 # 16-byte Folded Reload vssrarni.bu.h $vr1, $vr0, 15 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 299 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.bu.h $vr0, $vr0, 4 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 305 move $a3, $fp @@ -1784,17 +1778,17 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_56) vld $vr2, $a0, %pc_lo12(.LCPI2_56) - vst $vr2, $sp, 32 # 16-byte Folded Spill + vst $vr2, $sp, 16 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_57) pcalau12i $a1, %pc_hi20(.LCPI2_58) vld $vr1, $a1, %pc_lo12(.LCPI2_58) - vst $vr1, $sp, 80 # 16-byte Folded Spill + vst $vr1, $sp, 64 # 16-byte Folded Spill vld $vr0, $a0, %pc_lo12(.LCPI2_57) - vst $vr2, $sp, 128 + vst $vr2, $sp, 112 vssrarni.bu.h $vr1, $vr0, 4 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 311 move $a3, $fp @@ -1806,11 +1800,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_60) pcalau12i $a0, %pc_hi20(.LCPI2_61) vld $vr2, $a0, %pc_lo12(.LCPI2_61) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.bu.h $vr2, $vr1, 1 - vst $vr2, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 317 move $a3, $fp @@ -1822,11 +1816,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_63) pcalau12i $a0, %pc_hi20(.LCPI2_64) vld $vr2, $a0, %pc_lo12(.LCPI2_64) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.bu.h $vr2, $vr1, 7 - vst $vr2, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 323 move $a3, $fp @@ -1836,14 +1830,14 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_65) pcalau12i $a0, %pc_hi20(.LCPI2_66) vld $vr2, $a0, %pc_lo12(.LCPI2_66) - vst $vr2, $sp, 48 # 16-byte Folded Spill + vst $vr2, $sp, 32 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_67) vld $vr1, $a0, %pc_lo12(.LCPI2_67) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.bu.h $vr1, $vr2, 3 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 329 move $a3, $fp @@ -1853,12 +1847,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_68) pcalau12i $a0, %pc_hi20(.LCPI2_69) vld $vr1, $a0, %pc_lo12(.LCPI2_69) - vst $vr0, $sp, 128 - vld $vr0, $sp, 48 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 32 # 16-byte Folded Reload vssrarni.bu.h $vr1, $vr0, 12 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 335 move $a3, $fp @@ -1866,27 +1860,27 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_70) vld $vr1, $a0, %pc_lo12(.LCPI2_70) - vst $vr1, $sp, 48 # 16-byte Folded Spill + vst $vr1, $sp, 32 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_71) vld $vr0, $a0, %pc_lo12(.LCPI2_71) - vst $vr1, $sp, 128 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vld $vr1, $sp, 96 # 16-byte Folded Reload vssrarni.bu.h $vr0, $vr1, 3 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 341 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 - vld $vr1, $sp, 96 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr1, $sp, 80 # 16-byte Folded Reload vssrarni.bu.h $vr0, $vr1, 14 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 347 move $a3, $fp @@ -1894,20 +1888,20 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_72) vld $vr0, $a0, %pc_lo12(.LCPI2_72) - vld $vr1, $sp, 64 # 16-byte Folded Reload - vst $vr1, $sp, 128 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 48 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vld $vr1, $sp, 96 # 16-byte Folded Reload vssrarni.bu.h $vr1, $vr0, 2 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 353 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 pcalau12i $a0, %pc_hi20(.LCPI2_73) vld $vr0, $a0, %pc_lo12(.LCPI2_73) lu12i.w $a0, -234388 @@ -1915,9 +1909,9 @@ main: # @main lu32i.d $a0, 0 vreplgr2vr.d $vr1, $a0 vssrarni.bu.h $vr1, $vr0, 11 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 359 move $a3, $fp @@ -1925,35 +1919,35 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_74) vld $vr0, $a0, %pc_lo12(.LCPI2_74) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.bu.h $vr0, $vr0, 12 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 365 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.bu.h $vr0, $vr0, 2 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 371 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 - vld $vr0, $sp, 16 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vldi $vr0, -1777 vssrarni.bu.h $vr0, $vr1, 14 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 377 move $a3, $fp @@ -1965,11 +1959,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_76) pcalau12i $a0, %pc_hi20(.LCPI2_77) vld $vr2, $a0, %pc_lo12(.LCPI2_77) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.bu.h $vr2, $vr1, 14 - vst $vr2, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 383 move $a3, $fp @@ -1981,11 +1975,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_79) pcalau12i $a0, %pc_hi20(.LCPI2_80) vld $vr2, $a0, %pc_lo12(.LCPI2_80) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.bu.h $vr2, $vr1, 3 - vst $vr2, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 389 move $a3, $fp @@ -1993,15 +1987,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_81) vld $vr0, $a0, %pc_lo12(.LCPI2_81) - vst $vr0, $sp, 128 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr0, $a0 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vldi $vr0, -2305 + vld $vr1, $sp, 96 # 16-byte Folded Reload vssrarni.hu.w $vr1, $vr0, 7 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 395 move $a3, $fp @@ -2011,12 +2003,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_82) pcalau12i $a0, %pc_hi20(.LCPI2_83) vld $vr1, $a0, %pc_lo12(.LCPI2_83) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vssrarni.hu.w $vr0, $vr1, 1 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 401 move $a3, $fp @@ -2024,12 +2016,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_84) vld $vr0, $a0, %pc_lo12(.LCPI2_84) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.hu.w $vr0, $vr1, 15 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 407 move $a3, $fp @@ -2037,23 +2029,23 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_85) vld $vr0, $a0, %pc_lo12(.LCPI2_85) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.hu.w $vr1, $vr0, 13 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 413 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.hu.w $vr0, $vr0, 4 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 419 move $a3, $fp @@ -2063,23 +2055,23 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_86) pcalau12i $a0, %pc_hi20(.LCPI2_87) vld $vr1, $a0, %pc_lo12(.LCPI2_87) - vst $vr0, $sp, 128 - vld $vr0, $sp, 112 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload vssrarni.hu.w $vr0, $vr1, 14 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 425 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.hu.w $vr0, $vr0, 31 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 431 move $a3, $fp @@ -2089,12 +2081,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_88) pcalau12i $a0, %pc_hi20(.LCPI2_89) vld $vr1, $a0, %pc_lo12(.LCPI2_89) - vst $vr0, $sp, 128 - vld $vr0, $sp, 32 # 16-byte Folded Reload + vst $vr0, $sp, 112 + vld $vr0, $sp, 16 # 16-byte Folded Reload vssrarni.hu.w $vr1, $vr0, 3 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 437 move $a3, $fp @@ -2102,12 +2094,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_90) vld $vr0, $a0, %pc_lo12(.LCPI2_90) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.hu.w $vr1, $vr0, 14 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 443 move $a3, $fp @@ -2115,12 +2107,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_91) vld $vr0, $a0, %pc_lo12(.LCPI2_91) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.hu.w $vr0, $vr1, 17 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 449 move $a3, $fp @@ -2132,37 +2124,37 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_93) pcalau12i $a0, %pc_hi20(.LCPI2_94) vld $vr2, $a0, %pc_lo12(.LCPI2_94) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.hu.w $vr2, $vr1, 28 - vst $vr2, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 455 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu32i.d $s1, -458753 + lu32i.d $s0, -458753 pcalau12i $a0, %pc_hi20(.LCPI2_95) vld $vr0, $a0, %pc_lo12(.LCPI2_95) - lu52i.d $a0, $s1, 1709 + lu52i.d $a0, $s0, 1709 vreplgr2vr.d $vr1, $a0 - vst $vr1, $sp, 128 + vst $vr1, $sp, 112 vssrarni.hu.w $vr0, $vr0, 14 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 461 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.hu.w $vr0, $vr0, 19 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 467 move $a3, $fp @@ -2172,12 +2164,12 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_96) pcalau12i $a0, %pc_hi20(.LCPI2_97) vld $vr1, $a0, %pc_lo12(.LCPI2_97) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vrepli.b $vr0, 99 vssrarni.hu.w $vr0, $vr1, 30 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 473 move $a3, $fp @@ -2189,11 +2181,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_99) pcalau12i $a0, %pc_hi20(.LCPI2_100) vld $vr2, $a0, %pc_lo12(.LCPI2_100) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.hu.w $vr2, $vr1, 10 - vst $vr2, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 479 move $a3, $fp @@ -2201,14 +2193,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_101) vld $vr0, $a0, %pc_lo12(.LCPI2_101) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 - lu32i.d $s0, -65536 - vreplgr2vr.d $vr1, $s0 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 + vldi $vr1, -1600 vssrarni.hu.w $vr0, $vr1, 9 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 485 move $a3, $fp @@ -2218,23 +2209,23 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_102) pcalau12i $a0, %pc_hi20(.LCPI2_103) vld $vr1, $a0, %pc_lo12(.LCPI2_103) - vld $vr2, $sp, 48 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 32 # 16-byte Folded Reload + vst $vr2, $sp, 112 vssrarni.wu.d $vr1, $vr0, 63 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 491 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.wu.d $vr0, $vr0, 8 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 497 move $a3, $fp @@ -2246,11 +2237,11 @@ main: # @main vld $vr1, $a0, %pc_lo12(.LCPI2_105) pcalau12i $a0, %pc_hi20(.LCPI2_106) vld $vr2, $a0, %pc_lo12(.LCPI2_106) - vst $vr0, $sp, 128 + vst $vr0, $sp, 112 vssrarni.wu.d $vr2, $vr1, 27 - vst $vr2, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 503 move $a3, $fp @@ -2258,12 +2249,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_107) vld $vr0, $a0, %pc_lo12(.LCPI2_107) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.wu.d $vr0, $vr0, 44 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 509 move $a3, $fp @@ -2273,23 +2264,23 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_108) pcalau12i $a0, %pc_hi20(.LCPI2_109) vld $vr1, $a0, %pc_lo12(.LCPI2_109) - vld $vr2, $sp, 80 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 64 # 16-byte Folded Reload + vst $vr2, $sp, 112 vssrarni.wu.d $vr1, $vr0, 16 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 515 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.du.q $vr0, $vr0, 27 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 521 move $a3, $fp @@ -2297,12 +2288,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_110) vld $vr0, $a0, %pc_lo12(.LCPI2_110) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.du.q $vr0, $vr1, 114 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 527 move $a3, $fp @@ -2310,24 +2301,24 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_111) vld $vr0, $a0, %pc_lo12(.LCPI2_111) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.du.q $vr1, $vr0, 92 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 533 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vrepli.h $vr0, -4 vssrarni.du.q $vr0, $vr1, 48 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 539 move $a3, $fp @@ -2337,23 +2328,23 @@ main: # @main vld $vr0, $a0, %pc_lo12(.LCPI2_112) pcalau12i $a0, %pc_hi20(.LCPI2_113) vld $vr1, $a0, %pc_lo12(.LCPI2_113) - vld $vr2, $sp, 112 # 16-byte Folded Reload - vst $vr2, $sp, 128 + vld $vr2, $sp, 96 # 16-byte Folded Reload + vst $vr2, $sp, 112 vssrarni.du.q $vr1, $vr0, 96 - vst $vr1, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr1, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 545 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.du.q $vr0, $vr0, 88 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 551 move $a3, $fp @@ -2361,45 +2352,44 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_114) vld $vr0, $a0, %pc_lo12(.LCPI2_114) - vld $vr1, $sp, 112 # 16-byte Folded Reload - vst $vr1, $sp, 128 + vld $vr1, $sp, 96 # 16-byte Folded Reload + vst $vr1, $sp, 112 vssrarni.du.q $vr0, $vr0, 113 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 557 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.du.q $vr0, $vr0, 37 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 563 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 112 # 16-byte Folded Reload - vst $vr0, $sp, 128 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 112 vssrarni.du.q $vr0, $vr0, 7 - vst $vr0, $sp, 144 - addi.d $a0, $sp, 128 - addi.d $a1, $sp, 144 + vst $vr0, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a1, $sp, 128 ori $a2, $zero, 16 ori $a4, $zero, 569 move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s1, $sp, 160 # 8-byte Folded Reload - ld.d $s0, $sp, 168 # 8-byte Folded Reload - ld.d $fp, $sp, 176 # 8-byte Folded Reload - ld.d $ra, $sp, 184 # 8-byte Folded Reload - addi.d $sp, $sp, 192 + ld.d $s0, $sp, 152 # 8-byte Folded Reload + ld.d $fp, $sp, 160 # 8-byte Folded Reload + ld.d $ra, $sp, 168 # 8-byte Folded Reload + addi.d $sp, $sp, 176 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrlni.dir/lsx-vssrlni.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrlni.dir/lsx-vssrlni.s index 17f53e75..05cd665b 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrlni.dir/lsx-vssrlni.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrlni.dir/lsx-vssrlni.s @@ -794,7 +794,6 @@ main: # @main st.d $ra, $sp, 248 # 8-byte Folded Spill st.d $fp, $sp, 240 # 8-byte Folded Spill st.d $s0, $sp, 232 # 8-byte Folded Spill - st.d $s1, $sp, 224 # 8-byte Folded Spill vrepli.b $vr0, 0 vst $vr0, $sp, 176 # 16-byte Folded Spill vst $vr0, $sp, 192 @@ -1338,14 +1337,11 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - vld $vr0, $sp, 32 # 16-byte Folded Reload - vst $vr0, $sp, 192 pcalau12i $a0, %pc_hi20(.LCPI2_54) vld $vr0, $a0, %pc_lo12(.LCPI2_54) - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr1, $a0 - vst $vr1, $sp, 32 # 16-byte Folded Spill + vld $vr1, $sp, 32 # 16-byte Folded Reload + vst $vr1, $sp, 192 + vldi $vr1, -2305 vssrlni.d.q $vr0, $vr1, 32 vst $vr0, $sp, 208 addi.d $a0, $sp, 192 @@ -1355,10 +1351,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - ori $s1, $zero, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - vreplgr2vr.d $vr0, $a0 + vldi $vr0, -1552 vst $vr0, $sp, 192 vld $vr0, $sp, 112 # 16-byte Folded Reload vssrlni.bu.h $vr0, $vr0, 1 @@ -1383,10 +1376,11 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_56) - vld $vr0, $a0, %pc_lo12(.LCPI2_56) - lu32i.d $s1, 65537 - vreplgr2vr.d $vr1, $s1 + ori $a0, $zero, 0 + pcalau12i $a1, %pc_hi20(.LCPI2_56) + vld $vr0, $a1, %pc_lo12(.LCPI2_56) + lu32i.d $a0, 65537 + vreplgr2vr.d $vr1, $a0 vst $vr1, $sp, 192 vssrlni.bu.h $vr0, $vr0, 15 vst $vr0, $sp, 208 @@ -1543,7 +1537,7 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 160 # 16-byte Folded Reload vst $vr0, $sp, 192 - vld $vr0, $sp, 32 # 16-byte Folded Reload + vldi $vr0, -2305 vld $vr1, $sp, 176 # 16-byte Folded Reload vssrlni.hu.w $vr0, $vr1, 0 vst $vr0, $sp, 208 @@ -1837,7 +1831,6 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s1, $sp, 224 # 8-byte Folded Reload ld.d $s0, $sp, 232 # 8-byte Folded Reload ld.d $fp, $sp, 240 # 8-byte Folded Reload ld.d $ra, $sp, 248 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrlrn.dir/lsx-vssrlrn.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrlrn.dir/lsx-vssrlrn.s index e13a4ba1..7d619cd1 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrlrn.dir/lsx-vssrlrn.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrlrn.dir/lsx-vssrlrn.s @@ -1009,8 +1009,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_44) vld $vr1, $a0, %pc_lo12(.LCPI2_44) vst $vr0, $sp, 96 - lu12i.w $a0, 1 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2800 vssrlrn.hu.w $vr0, $vr1, $vr0 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrlrni.dir/lsx-vssrlrni.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrlrni.dir/lsx-vssrlrni.s index 09b80416..dafaa806 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrlrni.dir/lsx-vssrlrni.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssrlrni.dir/lsx-vssrlrni.s @@ -913,18 +913,14 @@ main: # @main st.d $ra, $sp, 184 # 8-byte Folded Spill st.d $fp, $sp, 176 # 8-byte Folded Spill st.d $s0, $sp, 168 # 8-byte Folded Spill - st.d $s1, $sp, 160 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI2_0) vld $vr0, $a0, %pc_lo12(.LCPI2_0) - vst $vr0, $sp, 128 pcalau12i $a0, %pc_hi20(.LCPI2_1) - vld $vr0, $a0, %pc_lo12(.LCPI2_1) - ori $s1, $zero, 0 - ori $a0, $zero, 0 - lu32i.d $a0, -1 - vreplgr2vr.d $vr1, $a0 - vssrlrni.bu.h $vr0, $vr1, 9 - vst $vr0, $sp, 144 + vld $vr1, $a0, %pc_lo12(.LCPI2_1) + vst $vr0, $sp, 128 + vldi $vr0, -1552 + vssrlrni.bu.h $vr1, $vr0, 9 + vst $vr1, $sp, 144 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $fp, $a0, %pc_lo12(.L.str.5) addi.d $a0, $sp, 128 @@ -952,14 +948,12 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.LCPI2_4) vld $vr0, $a0, %pc_lo12(.LCPI2_4) - vst $vr0, $sp, 128 pcalau12i $a0, %pc_hi20(.LCPI2_5) - vld $vr0, $a0, %pc_lo12(.LCPI2_5) - lu12i.w $a0, 511 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr1, $a0 - vssrlrni.bu.h $vr1, $vr0, 1 - vst $vr1, $sp, 144 + vld $vr1, $a0, %pc_lo12(.LCPI2_5) + vst $vr0, $sp, 128 + vldi $vr0, -2273 + vssrlrni.bu.h $vr0, $vr1, 1 + vst $vr0, $sp, 144 addi.d $a0, $sp, 128 addi.d $a1, $sp, 144 ori $a2, $zero, 16 @@ -1300,8 +1294,9 @@ main: # @main jirl $ra, $ra, 0 vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $sp, 128 - lu32i.d $s1, -2 - lu52i.d $a0, $s1, 2047 + ori $a0, $zero, 0 + lu32i.d $a0, -2 + lu52i.d $a0, $a0, 2047 vreplgr2vr.d $vr0, $a0 vld $vr1, $sp, 112 # 16-byte Folded Reload vssrlrni.du.q $vr1, $vr0, 58 @@ -1842,7 +1837,6 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s1, $sp, 160 # 8-byte Folded Reload ld.d $s0, $sp, 168 # 8-byte Folded Reload ld.d $fp, $sp, 176 # 8-byte Folded Reload ld.d $ra, $sp, 184 # 8-byte Folded Reload diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssub-2.dir/lsx-vssub-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssub-2.dir/lsx-vssub-2.s index ea6f6856..513a2377 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssub-2.dir/lsx-vssub-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vssub-2.dir/lsx-vssub-2.s @@ -1073,10 +1073,10 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 lu12i.w $a0, 1 - ori $a1, $a0, 514 - vreplgr2vr.h $vr0, $a1 - vst $vr0, $sp, 112 + ori $a0, $a0, 514 vreplgr2vr.h $vr0, $a0 + vst $vr0, $sp, 112 + vldi $vr0, -2800 vrepli.b $vr1, -2 vst $vr1, $sp, 32 # 16-byte Folded Spill vssub.b $vr0, $vr0, $vr1 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsubwev-2.dir/lsx-vsubwev-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsubwev-2.dir/lsx-vsubwev-2.s index badec7ab..beaf7957 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsubwev-2.dir/lsx-vsubwev-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsubwev-2.dir/lsx-vsubwev-2.s @@ -613,10 +613,9 @@ check_lsx_fp_out: # @check_lsx_fp_out .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -160 - st.d $ra, $sp, 152 # 8-byte Folded Spill - st.d $fp, $sp, 144 # 8-byte Folded Spill - st.d $s0, $sp, 136 # 8-byte Folded Spill + addi.d $sp, $sp, -144 + st.d $ra, $sp, 136 # 8-byte Folded Spill + st.d $fp, $sp, 128 # 8-byte Folded Spill vrepli.b $vr0, 0 vst $vr0, $sp, 80 # 16-byte Folded Spill vst $vr0, $sp, 96 @@ -665,8 +664,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_3) vld $vr1, $a0, %pc_lo12(.LCPI2_3) vst $vr0, $sp, 96 - lu12i.w $a0, 1 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2800 vsubwev.h.bu $vr0, $vr0, $vr1 vst $vr0, $sp, 112 addi.d $a0, $sp, 96 @@ -849,9 +847,7 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - ori $a0, $a0, 4095 - vreplgr2vr.w $vr0, $a0 + vldi $vr0, -2305 vst $vr0, $sp, 96 vld $vr0, $sp, 64 # 16-byte Folded Reload vld $vr1, $sp, 80 # 16-byte Folded Reload @@ -973,7 +969,6 @@ main: # @main jirl $ra, $ra, 0 vld $vr1, $sp, 80 # 16-byte Folded Reload vst $vr1, $sp, 96 - ori $s0, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, -1 lu52i.d $a0, $a0, 3 @@ -1251,8 +1246,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_66) vld $vr0, $a0, %pc_lo12(.LCPI2_66) vst $vr0, $sp, 96 - lu32i.d $s0, -65536 - vreplgr2vr.d $vr0, $s0 + vldi $vr0, -1600 vld $vr1, $sp, 80 # 16-byte Folded Reload vsubwev.q.du $vr0, $vr1, $vr0 vst $vr0, $sp, 112 @@ -1264,10 +1258,9 @@ main: # @main pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 move $a0, $zero - ld.d $s0, $sp, 136 # 8-byte Folded Reload - ld.d $fp, $sp, 144 # 8-byte Folded Reload - ld.d $ra, $sp, 152 # 8-byte Folded Reload - addi.d $sp, $sp, 160 + ld.d $fp, $sp, 128 # 8-byte Folded Reload + ld.d $ra, $sp, 136 # 8-byte Folded Reload + addi.d $sp, $sp, 144 ret .Lfunc_end2: .size main, .Lfunc_end2-main diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsubwod-1.dir/lsx-vsubwod-1.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsubwod-1.dir/lsx-vsubwod-1.s index 0c00b994..906d7b49 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsubwod-1.dir/lsx-vsubwod-1.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsubwod-1.dir/lsx-vsubwod-1.s @@ -911,8 +911,7 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI2_39) vld $vr1, $a0, %pc_lo12(.LCPI2_39) vst $vr0, $sp, 64 - lu12i.w $a0, 1 - vreplgr2vr.h $vr0, $a0 + vldi $vr0, -2800 vsubwod.q.d $vr0, $vr1, $vr0 vst $vr0, $sp, 80 addi.d $a0, $sp, 64 diff --git a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsubwod-2.dir/lsx-vsubwod-2.s b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsubwod-2.dir/lsx-vsubwod-2.s index 2ad58d7a..a4c6ff29 100644 --- a/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsubwod-2.dir/lsx-vsubwod-2.s +++ b/results/SingleSource/UnitTests/Vector/LSX/CMakeFiles/Vector-LSX-lsx-vsubwod-2.dir/lsx-vsubwod-2.s @@ -617,11 +617,9 @@ main: # @main move $a3, $fp pcaddu18i $ra, %call36(check_lsx_out) jirl $ra, $ra, 0 - lu12i.w $a0, 15 - pcalau12i $a1, %pc_hi20(.LCPI2_11) - vld $vr0, $a1, %pc_lo12(.LCPI2_11) - ori $a0, $a0, 4095 - vreplgr2vr.d $vr1, $a0 + pcalau12i $a0, %pc_hi20(.LCPI2_11) + vld $vr0, $a0, %pc_lo12(.LCPI2_11) + vldi $vr1, -1789 vst $vr1, $sp, 80 vld $vr1, $sp, 64 # 16-byte Folded Reload vsubwod.w.hu $vr0, $vr0, $vr1 diff --git a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/any-of.dir/any-of.s b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/any-of.dir/any-of.s index 1c399aa2..8d74ca6b 100644 --- a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/any-of.dir/any-of.s +++ b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/any-of.dir/any-of.s @@ -1390,18 +1390,17 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception1 # %bb.0: - addi.d $sp, $sp, -192 - .cfi_def_cfa_offset 192 - st.d $ra, $sp, 184 # 8-byte Folded Spill - st.d $fp, $sp, 176 # 8-byte Folded Spill - st.d $s0, $sp, 168 # 8-byte Folded Spill - st.d $s1, $sp, 160 # 8-byte Folded Spill - st.d $s2, $sp, 152 # 8-byte Folded Spill - st.d $s3, $sp, 144 # 8-byte Folded Spill - st.d $s4, $sp, 136 # 8-byte Folded Spill - st.d $s5, $sp, 128 # 8-byte Folded Spill - st.d $s6, $sp, 120 # 8-byte Folded Spill - st.d $s7, $sp, 112 # 8-byte Folded Spill + addi.d $sp, $sp, -176 + .cfi_def_cfa_offset 176 + st.d $ra, $sp, 168 # 8-byte Folded Spill + st.d $fp, $sp, 160 # 8-byte Folded Spill + st.d $s0, $sp, 152 # 8-byte Folded Spill + st.d $s1, $sp, 144 # 8-byte Folded Spill + st.d $s2, $sp, 136 # 8-byte Folded Spill + st.d $s3, $sp, 128 # 8-byte Folded Spill + st.d $s4, $sp, 120 # 8-byte Folded Spill + st.d $s5, $sp, 112 # 8-byte Folded Spill + st.d $s6, $sp, 104 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -1411,7 +1410,6 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto .cfi_offset 27, -56 .cfi_offset 28, -64 .cfi_offset 29, -72 - .cfi_offset 30, -80 move $fp, $a2 move $s1, $a1 move $s2, $a0 @@ -1462,38 +1460,38 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto .Ltmp181: # EH_LABEL # %bb.4: move $s0, $a0 - lu12i.w $s6, -524288 - lu52i.d $s5, $s6, 2047 - st.d $s5, $sp, 104 + lu12i.w $a0, -524288 + lu52i.d $s5, $a0, 2047 + st.d $s5, $sp, 96 lu12i.w $s4, -1 - ori $s7, $s4, 96 + ori $s6, $s4, 96 pcalau12i $a0, %pc_hi20(_ZL3rng) addi.d $s3, $a0, %pc_lo12(_ZL3rng) .p2align 4, , 16 .LBB1_5: # =>This Inner Loop Header: Depth=1 .Ltmp183: # EH_LABEL - addi.d $a0, $sp, 104 - addi.d $a2, $sp, 104 + addi.d $a0, $sp, 96 + addi.d $a2, $sp, 96 move $a1, $s3 pcaddu18i $ra, %call36(_ZNSt24uniform_int_distributionIiEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEiRT_RKNS0_10param_typeE) jirl $ra, $ra, 0 .Ltmp184: # EH_LABEL # %bb.6: # %.noexc # in Loop: Header=BB1_5 Depth=1 - add.d $a1, $fp, $s7 - addi.d $s7, $s7, 4 + add.d $a1, $fp, $s6 + addi.d $s6, $s6, 4 stptr.w $a0, $a1, 4000 - bnez $s7, .LBB1_5 + bnez $s6, .LBB1_5 # %bb.7: - st.d $s5, $sp, 104 + st.d $s5, $sp, 96 ori $s5, $s4, 96 pcalau12i $a0, %pc_hi20(_ZL3rng) addi.d $s3, $a0, %pc_lo12(_ZL3rng) .p2align 4, , 16 .LBB1_8: # =>This Inner Loop Header: Depth=1 .Ltmp186: # EH_LABEL - addi.d $a0, $sp, 104 - addi.d $a2, $sp, 104 + addi.d $a0, $sp, 96 + addi.d $a2, $sp, 96 move $a1, $s3 pcaddu18i $ra, %call36(_ZNSt24uniform_int_distributionIiEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEiRT_RKNS0_10param_typeE) jirl $ra, $ra, 0 @@ -1506,34 +1504,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto bnez $s5, .LBB1_8 # %bb.10: ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_58 # %bb.11: ld.d $a4, $s2, 24 .Ltmp189: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp190: # EH_LABEL # %bb.12: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_60 # %bb.13: ld.d $a4, $s1, 24 .Ltmp191: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp192: # EH_LABEL @@ -1546,7 +1544,7 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvreplgr2vr.w $xr1, $s5 ori $a1, $zero, 3968 ori $a2, $zero, 4000 - xvreplgr2vr.w $xr0, $s6 + xvldi $xr0, -3200 .p2align 4, , 16 .LBB1_16: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -1565,34 +1563,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvst $xr0, $sp, 48 # 32-byte Folded Spill xvstx $xr0, $s0, $a0 ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_63 # %bb.18: ld.d $a4, $s2, 24 .Ltmp195: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp196: # EH_LABEL # %bb.19: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_65 # %bb.20: ld.d $a4, $s1, 24 .Ltmp197: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp198: # EH_LABEL @@ -1620,34 +1618,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvstx $xr0, $fp, $a0 xvstx $xr1, $s0, $a0 ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_68 # %bb.25: ld.d $a4, $s2, 24 .Ltmp201: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp202: # EH_LABEL # %bb.26: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_70 # %bb.27: ld.d $a4, $s1, 24 .Ltmp203: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp204: # EH_LABEL @@ -1675,34 +1673,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvstx $xr0, $fp, $a0 stptr.w $s5, $fp, 3992 ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_73 # %bb.32: ld.d $a4, $s2, 24 .Ltmp207: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp208: # EH_LABEL # %bb.33: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_75 # %bb.34: ld.d $a4, $s1, 24 .Ltmp209: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp210: # EH_LABEL @@ -1730,34 +1728,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvstx $xr0, $fp, $a0 st.w $s5, $fp, 0 ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_78 # %bb.39: ld.d $a4, $s2, 24 .Ltmp213: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp214: # EH_LABEL # %bb.40: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_80 # %bb.41: ld.d $a4, $s1, 24 .Ltmp215: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp216: # EH_LABEL @@ -1785,34 +1783,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvstx $xr0, $fp, $a0 stptr.w $s5, $fp, 3996 ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_83 # %bb.46: ld.d $a4, $s2, 24 .Ltmp219: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp220: # EH_LABEL # %bb.47: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_85 # %bb.48: ld.d $a4, $s1, 24 .Ltmp221: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp222: # EH_LABEL @@ -1841,34 +1839,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto stptr.w $s5, $fp, 3996 st.w $s5, $fp, 0 ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_88 # %bb.53: ld.d $a4, $s2, 24 .Ltmp225: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp226: # EH_LABEL # %bb.54: move $s2, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_90 # %bb.55: ld.d $a4, $s1, 24 .Ltmp227: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp228: # EH_LABEL @@ -1881,17 +1879,16 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto move $a0, $fp pcaddu18i $ra, %call36(_ZdaPv) jirl $ra, $ra, 0 - ld.d $s7, $sp, 112 # 8-byte Folded Reload - ld.d $s6, $sp, 120 # 8-byte Folded Reload - ld.d $s5, $sp, 128 # 8-byte Folded Reload - ld.d $s4, $sp, 136 # 8-byte Folded Reload - ld.d $s3, $sp, 144 # 8-byte Folded Reload - ld.d $s2, $sp, 152 # 8-byte Folded Reload - ld.d $s1, $sp, 160 # 8-byte Folded Reload - ld.d $s0, $sp, 168 # 8-byte Folded Reload - ld.d $fp, $sp, 176 # 8-byte Folded Reload - ld.d $ra, $sp, 184 # 8-byte Folded Reload - addi.d $sp, $sp, 192 + ld.d $s6, $sp, 104 # 8-byte Folded Reload + ld.d $s5, $sp, 112 # 8-byte Folded Reload + ld.d $s4, $sp, 120 # 8-byte Folded Reload + ld.d $s3, $sp, 128 # 8-byte Folded Reload + ld.d $s2, $sp, 136 # 8-byte Folded Reload + ld.d $s1, $sp, 144 # 8-byte Folded Reload + ld.d $s0, $sp, 152 # 8-byte Folded Reload + ld.d $fp, $sp, 160 # 8-byte Folded Reload + ld.d $ra, $sp, 168 # 8-byte Folded Reload + addi.d $sp, $sp, 176 ret .LBB1_58: .Ltmp270: # EH_LABEL @@ -2393,8 +2390,7 @@ _ZL19checkVectorFunctionIifEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvreplgr2vr.w $xr1, $s4 ori $a1, $zero, 3968 ori $a2, $zero, 4000 - lu12i.w $a3, 2048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3456 .p2align 4, , 16 .LBB2_10: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -3113,17 +3109,17 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception3 # %bb.0: - addi.d $sp, $sp, -208 - .cfi_def_cfa_offset 208 - st.d $ra, $sp, 200 # 8-byte Folded Spill - st.d $fp, $sp, 192 # 8-byte Folded Spill - st.d $s0, $sp, 184 # 8-byte Folded Spill - st.d $s1, $sp, 176 # 8-byte Folded Spill - st.d $s2, $sp, 168 # 8-byte Folded Spill - st.d $s3, $sp, 160 # 8-byte Folded Spill - st.d $s4, $sp, 152 # 8-byte Folded Spill - st.d $s5, $sp, 144 # 8-byte Folded Spill - st.d $s6, $sp, 136 # 8-byte Folded Spill + addi.d $sp, $sp, -192 + .cfi_def_cfa_offset 192 + st.d $ra, $sp, 184 # 8-byte Folded Spill + st.d $fp, $sp, 176 # 8-byte Folded Spill + st.d $s0, $sp, 168 # 8-byte Folded Spill + st.d $s1, $sp, 160 # 8-byte Folded Spill + st.d $s2, $sp, 152 # 8-byte Folded Spill + st.d $s3, $sp, 144 # 8-byte Folded Spill + st.d $s4, $sp, 136 # 8-byte Folded Spill + st.d $s5, $sp, 128 # 8-byte Folded Spill + st.d $s6, $sp, 120 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -3185,15 +3181,15 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto # %bb.4: move $s0, $a0 lu12i.w $s5, 524280 - st.w $s5, $sp, 128 + st.w $s5, $sp, 112 pcalau12i $a0, %pc_hi20(_ZL3rng) addi.d $s3, $a0, %pc_lo12(_ZL3rng) move $s6, $zero .p2align 4, , 16 .LBB3_5: # =>This Inner Loop Header: Depth=1 .Ltmp363: # EH_LABEL - addi.d $a0, $sp, 128 - addi.d $a2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a2, $sp, 112 move $a1, $s3 pcaddu18i $ra, %call36(_ZNSt24uniform_int_distributionIsEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEsRT_RKNS0_10param_typeE) jirl $ra, $ra, 0 @@ -3204,7 +3200,7 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto addi.d $s6, $s6, 2 bne $s6, $s4, .LBB3_5 # %bb.7: - st.w $s5, $sp, 128 + st.w $s5, $sp, 112 pcalau12i $a0, %pc_hi20(_ZL3rng) addi.d $s3, $a0, %pc_lo12(_ZL3rng) move $s4, $zero @@ -3212,8 +3208,8 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto .p2align 4, , 16 .LBB3_8: # =>This Inner Loop Header: Depth=1 .Ltmp366: # EH_LABEL - addi.d $a0, $sp, 128 - addi.d $a2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a2, $sp, 112 move $a1, $s3 pcaddu18i $ra, %call36(_ZNSt24uniform_int_distributionIsEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEsRT_RKNS0_10param_typeE) jirl $ra, $ra, 0 @@ -3225,34 +3221,34 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto bne $s4, $s5, .LBB3_8 # %bb.10: ld.d $a0, $s2, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.w $a1, $sp, 116 + st.w $a1, $sp, 100 beqz $a0, .LBB3_46 # %bb.11: ld.d $a4, $s2, 24 .Ltmp369: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp370: # EH_LABEL # %bb.12: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.w $a1, $sp, 116 + st.w $a1, $sp, 100 beqz $a0, .LBB3_48 # %bb.13: ld.d $a4, $s1, 24 .Ltmp371: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp372: # EH_LABEL @@ -3264,8 +3260,7 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvreplgr2vr.h $xr1, $s4 xvst $xr1, $fp, 0 xvst $xr1, $fp, 32 - lu12i.w $a0, 8 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2688 xvst $xr0, $s0, 0 xvst $xr0, $s0, 32 xvst $xr1, $fp, 64 @@ -3388,50 +3383,49 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvst $xr1, $sp, 32 # 32-byte Folded Spill xvst $xr1, $fp, 1952 xvst $xr0, $s0, 1920 - xvst $xr0, $sp, 80 # 32-byte Folded Spill + xvst $xr0, $sp, 64 # 32-byte Folded Spill xvst $xr0, $s0, 1952 - vreplgr2vr.h $vr1, $s4 - vreplgr2vr.h $vr0, $a0 + vreplgr2vr.h $vr0, $s4 + vst $vr0, $sp, 16 # 16-byte Folded Spill + vst $vr0, $fp, 1984 ori $a0, $zero, 1000 - st.w $a0, $sp, 116 + st.w $a0, $sp, 100 ld.d $a0, $s2, 16 - vst $vr1, $sp, 16 # 16-byte Folded Spill - vst $vr1, $fp, 1984 - vst $vr0, $sp, 64 # 16-byte Folded Spill + vldi $vr0, -2688 vst $vr0, $s0, 1984 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 beqz $a0, .LBB3_51 # %bb.16: ld.d $a4, $s2, 24 .Ltmp375: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp376: # EH_LABEL # %bb.17: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.w $a1, $sp, 116 + st.w $a1, $sp, 100 beqz $a0, .LBB3_53 # %bb.18: ld.d $a4, $s1, 24 .Ltmp377: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp378: # EH_LABEL # %bb.19: bne $s3, $a0, .LBB3_55 # %bb.20: # %vector.body254 - xvld $xr0, $sp, 80 # 32-byte Folded Reload + xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $fp, 0 xvst $xr0, $fp, 32 xvld $xr1, $sp, 32 # 32-byte Folded Reload @@ -3557,46 +3551,46 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvst $xr0, $fp, 1952 xvst $xr1, $s0, 1920 xvst $xr1, $s0, 1952 + vldi $vr0, -2688 ori $a0, $zero, 1000 - st.w $a0, $sp, 116 + st.w $a0, $sp, 100 ld.d $a0, $s2, 16 - vld $vr0, $sp, 64 # 16-byte Folded Reload vst $vr0, $fp, 1984 vld $vr0, $sp, 16 # 16-byte Folded Reload vst $vr0, $s0, 1984 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 beqz $a0, .LBB3_56 # %bb.21: ld.d $a4, $s2, 24 .Ltmp381: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp382: # EH_LABEL # %bb.22: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.w $a1, $sp, 116 + st.w $a1, $sp, 100 beqz $a0, .LBB3_58 # %bb.23: ld.d $a4, $s1, 24 .Ltmp383: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp384: # EH_LABEL # %bb.24: bne $s3, $a0, .LBB3_60 # %bb.25: # %vector.body271 - xvld $xr0, $sp, 80 # 32-byte Folded Reload + xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $s0, 0 xvst $xr0, $s0, 32 xvst $xr0, $fp, 0 @@ -3721,46 +3715,46 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvst $xr0, $s0, 1952 xvst $xr0, $fp, 1920 xvst $xr0, $fp, 1952 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vldi $vr0, -2688 vst $vr0, $s0, 1984 ori $a0, $zero, 1000 - st.w $a0, $sp, 116 + st.w $a0, $sp, 100 ld.d $a0, $s2, 16 vst $vr0, $fp, 1984 st.h $s4, $fp, 1996 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 beqz $a0, .LBB3_61 # %bb.26: ld.d $a4, $s2, 24 .Ltmp387: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp388: # EH_LABEL # %bb.27: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.w $a1, $sp, 116 + st.w $a1, $sp, 100 beqz $a0, .LBB3_63 # %bb.28: ld.d $a4, $s1, 24 .Ltmp389: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp390: # EH_LABEL # %bb.29: bne $s3, $a0, .LBB3_65 # %bb.30: # %vector.body288 - xvld $xr0, $sp, 80 # 32-byte Folded Reload + xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $s0, 0 xvst $xr0, $s0, 32 xvst $xr0, $fp, 0 @@ -3885,46 +3879,46 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvst $xr0, $s0, 1952 xvst $xr0, $fp, 1920 xvst $xr0, $fp, 1952 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vldi $vr0, -2688 vst $vr0, $s0, 1984 ori $a0, $zero, 1000 - st.w $a0, $sp, 116 + st.w $a0, $sp, 100 ld.d $a0, $s2, 16 vst $vr0, $fp, 1984 st.h $s4, $fp, 0 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 beqz $a0, .LBB3_66 # %bb.31: ld.d $a4, $s2, 24 .Ltmp393: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp394: # EH_LABEL # %bb.32: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.w $a1, $sp, 116 + st.w $a1, $sp, 100 beqz $a0, .LBB3_68 # %bb.33: ld.d $a4, $s1, 24 .Ltmp395: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp396: # EH_LABEL # %bb.34: bne $s3, $a0, .LBB3_70 # %bb.35: # %vector.body305 - xvld $xr0, $sp, 80 # 32-byte Folded Reload + xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $s0, 0 xvst $xr0, $s0, 32 xvst $xr0, $fp, 0 @@ -4049,46 +4043,46 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvst $xr0, $s0, 1952 xvst $xr0, $fp, 1920 xvst $xr0, $fp, 1952 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vldi $vr0, -2688 vst $vr0, $s0, 1984 ori $a0, $zero, 1000 - st.w $a0, $sp, 116 + st.w $a0, $sp, 100 ld.d $a0, $s2, 16 vst $vr0, $fp, 1984 st.h $s4, $fp, 1998 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 beqz $a0, .LBB3_71 # %bb.36: ld.d $a4, $s2, 24 .Ltmp399: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp400: # EH_LABEL # %bb.37: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.w $a1, $sp, 116 + st.w $a1, $sp, 100 beqz $a0, .LBB3_73 # %bb.38: ld.d $a4, $s1, 24 .Ltmp401: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp402: # EH_LABEL # %bb.39: bne $s3, $a0, .LBB3_75 # %bb.40: # %vector.body322 - xvld $xr0, $sp, 80 # 32-byte Folded Reload + xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $s0, 0 xvst $xr0, $s0, 32 xvst $xr0, $fp, 0 @@ -4212,41 +4206,41 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvst $xr0, $s0, 1920 xvst $xr0, $s0, 1952 xvst $xr0, $fp, 1920 - vld $vr1, $sp, 64 # 16-byte Folded Reload - vst $vr1, $fp, 1984 + xvst $xr0, $fp, 1952 + vldi $vr0, -2688 + vst $vr0, $fp, 1984 st.h $s4, $fp, 1998 st.h $s4, $fp, 0 ori $a0, $zero, 1000 - st.w $a0, $sp, 116 - ld.d $a0, $s2, 16 - xvst $xr0, $fp, 1952 - vst $vr1, $s0, 1984 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 - beqz $a0, .LBB3_76 + ld.d $a1, $s2, 16 + st.w $a0, $sp, 100 + vst $vr0, $s0, 1984 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 + beqz $a1, .LBB3_76 # %bb.41: ld.d $a4, $s2, 24 .Ltmp405: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp406: # EH_LABEL # %bb.42: move $s2, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.w $a1, $sp, 116 + st.w $a1, $sp, 100 beqz $a0, .LBB3_78 # %bb.43: ld.d $a4, $s1, 24 .Ltmp407: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 116 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 100 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp408: # EH_LABEL @@ -4259,16 +4253,16 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto move $a0, $fp pcaddu18i $ra, %call36(_ZdaPv) jirl $ra, $ra, 0 - ld.d $s6, $sp, 136 # 8-byte Folded Reload - ld.d $s5, $sp, 144 # 8-byte Folded Reload - ld.d $s4, $sp, 152 # 8-byte Folded Reload - ld.d $s3, $sp, 160 # 8-byte Folded Reload - ld.d $s2, $sp, 168 # 8-byte Folded Reload - ld.d $s1, $sp, 176 # 8-byte Folded Reload - ld.d $s0, $sp, 184 # 8-byte Folded Reload - ld.d $fp, $sp, 192 # 8-byte Folded Reload - ld.d $ra, $sp, 200 # 8-byte Folded Reload - addi.d $sp, $sp, 208 + ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 128 # 8-byte Folded Reload + ld.d $s4, $sp, 136 # 8-byte Folded Reload + ld.d $s3, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 152 # 8-byte Folded Reload + ld.d $s1, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload + ld.d $fp, $sp, 176 # 8-byte Folded Reload + ld.d $ra, $sp, 184 # 8-byte Folded Reload + addi.d $sp, $sp, 192 ret .LBB3_46: .Ltmp450: # EH_LABEL @@ -5620,8 +5614,7 @@ _ZL19checkVectorFunctionIjfEvSt8functionIFT_PT0_S3_jEES5_PKc: # @_ZL19checkVecto xvreplgr2vr.w $xr1, $s4 ori $a1, $zero, 3968 ori $a2, $zero, 4000 - lu12i.w $a3, 2048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3456 .p2align 4, , 16 .LBB5_10: # %vector.body # =>This Inner Loop Header: Depth=1 diff --git a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/find-last.dir/find-last.s b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/find-last.dir/find-last.s index 1583412b..2cf1d443 100644 --- a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/find-last.dir/find-last.s +++ b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/find-last.dir/find-last.s @@ -2997,18 +2997,17 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception1 # %bb.0: - addi.d $sp, $sp, -192 - .cfi_def_cfa_offset 192 - st.d $ra, $sp, 184 # 8-byte Folded Spill - st.d $fp, $sp, 176 # 8-byte Folded Spill - st.d $s0, $sp, 168 # 8-byte Folded Spill - st.d $s1, $sp, 160 # 8-byte Folded Spill - st.d $s2, $sp, 152 # 8-byte Folded Spill - st.d $s3, $sp, 144 # 8-byte Folded Spill - st.d $s4, $sp, 136 # 8-byte Folded Spill - st.d $s5, $sp, 128 # 8-byte Folded Spill - st.d $s6, $sp, 120 # 8-byte Folded Spill - st.d $s7, $sp, 112 # 8-byte Folded Spill + addi.d $sp, $sp, -176 + .cfi_def_cfa_offset 176 + st.d $ra, $sp, 168 # 8-byte Folded Spill + st.d $fp, $sp, 160 # 8-byte Folded Spill + st.d $s0, $sp, 152 # 8-byte Folded Spill + st.d $s1, $sp, 144 # 8-byte Folded Spill + st.d $s2, $sp, 136 # 8-byte Folded Spill + st.d $s3, $sp, 128 # 8-byte Folded Spill + st.d $s4, $sp, 120 # 8-byte Folded Spill + st.d $s5, $sp, 112 # 8-byte Folded Spill + st.d $s6, $sp, 104 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -3018,7 +3017,6 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec .cfi_offset 27, -56 .cfi_offset 28, -64 .cfi_offset 29, -72 - .cfi_offset 30, -80 move $fp, $a2 move $s1, $a1 move $s2, $a0 @@ -3069,38 +3067,38 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec .Ltmp406: # EH_LABEL # %bb.4: move $s0, $a0 - lu12i.w $s6, -524288 - lu52i.d $s5, $s6, 2047 - st.d $s5, $sp, 104 + lu12i.w $a0, -524288 + lu52i.d $s5, $a0, 2047 + st.d $s5, $sp, 96 lu12i.w $s4, -1 - ori $s7, $s4, 96 + ori $s6, $s4, 96 pcalau12i $a0, %pc_hi20(_ZL3rng) addi.d $s3, $a0, %pc_lo12(_ZL3rng) .p2align 4, , 16 .LBB1_5: # =>This Inner Loop Header: Depth=1 .Ltmp408: # EH_LABEL - addi.d $a0, $sp, 104 - addi.d $a2, $sp, 104 + addi.d $a0, $sp, 96 + addi.d $a2, $sp, 96 move $a1, $s3 pcaddu18i $ra, %call36(_ZNSt24uniform_int_distributionIiEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEiRT_RKNS0_10param_typeE) jirl $ra, $ra, 0 .Ltmp409: # EH_LABEL # %bb.6: # %.noexc # in Loop: Header=BB1_5 Depth=1 - add.d $a1, $fp, $s7 - addi.d $s7, $s7, 4 + add.d $a1, $fp, $s6 + addi.d $s6, $s6, 4 stptr.w $a0, $a1, 4000 - bnez $s7, .LBB1_5 + bnez $s6, .LBB1_5 # %bb.7: - st.d $s5, $sp, 104 + st.d $s5, $sp, 96 ori $s5, $s4, 96 pcalau12i $a0, %pc_hi20(_ZL3rng) addi.d $s3, $a0, %pc_lo12(_ZL3rng) .p2align 4, , 16 .LBB1_8: # =>This Inner Loop Header: Depth=1 .Ltmp411: # EH_LABEL - addi.d $a0, $sp, 104 - addi.d $a2, $sp, 104 + addi.d $a0, $sp, 96 + addi.d $a2, $sp, 96 move $a1, $s3 pcaddu18i $ra, %call36(_ZNSt24uniform_int_distributionIiEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEiRT_RKNS0_10param_typeE) jirl $ra, $ra, 0 @@ -3113,34 +3111,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec bnez $s5, .LBB1_8 # %bb.10: ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_58 # %bb.11: ld.d $a4, $s2, 24 .Ltmp414: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp415: # EH_LABEL # %bb.12: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_60 # %bb.13: ld.d $a4, $s1, 24 .Ltmp416: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp417: # EH_LABEL @@ -3153,7 +3151,7 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvreplgr2vr.w $xr1, $s5 ori $a1, $zero, 3968 ori $a2, $zero, 4000 - xvreplgr2vr.w $xr0, $s6 + xvldi $xr0, -3200 .p2align 4, , 16 .LBB1_16: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -3172,34 +3170,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvst $xr0, $sp, 48 # 32-byte Folded Spill xvstx $xr0, $s0, $a0 ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_63 # %bb.18: ld.d $a4, $s2, 24 .Ltmp420: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp421: # EH_LABEL # %bb.19: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_65 # %bb.20: ld.d $a4, $s1, 24 .Ltmp422: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp423: # EH_LABEL @@ -3227,34 +3225,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvstx $xr0, $fp, $a0 xvstx $xr1, $s0, $a0 ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_68 # %bb.25: ld.d $a4, $s2, 24 .Ltmp426: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp427: # EH_LABEL # %bb.26: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_70 # %bb.27: ld.d $a4, $s1, 24 .Ltmp428: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp429: # EH_LABEL @@ -3282,34 +3280,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvstx $xr0, $fp, $a0 stptr.w $s5, $fp, 3992 ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_73 # %bb.32: ld.d $a4, $s2, 24 .Ltmp432: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp433: # EH_LABEL # %bb.33: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_75 # %bb.34: ld.d $a4, $s1, 24 .Ltmp434: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp435: # EH_LABEL @@ -3337,34 +3335,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvstx $xr0, $fp, $a0 st.w $s5, $fp, 0 ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_78 # %bb.39: ld.d $a4, $s2, 24 .Ltmp438: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp439: # EH_LABEL # %bb.40: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_80 # %bb.41: ld.d $a4, $s1, 24 .Ltmp440: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp441: # EH_LABEL @@ -3392,34 +3390,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvstx $xr0, $fp, $a0 stptr.w $s5, $fp, 3996 ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_83 # %bb.46: ld.d $a4, $s2, 24 .Ltmp444: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp445: # EH_LABEL # %bb.47: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_85 # %bb.48: ld.d $a4, $s1, 24 .Ltmp446: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp447: # EH_LABEL @@ -3448,34 +3446,34 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec stptr.w $s5, $fp, 3996 st.w $s5, $fp, 0 ld.d $a0, $s2, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_88 # %bb.53: ld.d $a4, $s2, 24 .Ltmp450: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp451: # EH_LABEL # %bb.54: move $s2, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 104 - st.d $s0, $sp, 96 + st.d $fp, $sp, 96 + st.d $s0, $sp, 88 ori $a1, $zero, 1000 - st.w $a1, $sp, 92 + st.w $a1, $sp, 84 beqz $a0, .LBB1_90 # %bb.55: ld.d $a4, $s1, 24 .Ltmp452: # EH_LABEL - addi.d $a1, $sp, 104 - addi.d $a2, $sp, 96 - addi.d $a3, $sp, 92 + addi.d $a1, $sp, 96 + addi.d $a2, $sp, 88 + addi.d $a3, $sp, 84 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp453: # EH_LABEL @@ -3488,17 +3486,16 @@ _ZL19checkVectorFunctionIiiEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec move $a0, $fp pcaddu18i $ra, %call36(_ZdaPv) jirl $ra, $ra, 0 - ld.d $s7, $sp, 112 # 8-byte Folded Reload - ld.d $s6, $sp, 120 # 8-byte Folded Reload - ld.d $s5, $sp, 128 # 8-byte Folded Reload - ld.d $s4, $sp, 136 # 8-byte Folded Reload - ld.d $s3, $sp, 144 # 8-byte Folded Reload - ld.d $s2, $sp, 152 # 8-byte Folded Reload - ld.d $s1, $sp, 160 # 8-byte Folded Reload - ld.d $s0, $sp, 168 # 8-byte Folded Reload - ld.d $fp, $sp, 176 # 8-byte Folded Reload - ld.d $ra, $sp, 184 # 8-byte Folded Reload - addi.d $sp, $sp, 192 + ld.d $s6, $sp, 104 # 8-byte Folded Reload + ld.d $s5, $sp, 112 # 8-byte Folded Reload + ld.d $s4, $sp, 120 # 8-byte Folded Reload + ld.d $s3, $sp, 128 # 8-byte Folded Reload + ld.d $s2, $sp, 136 # 8-byte Folded Reload + ld.d $s1, $sp, 144 # 8-byte Folded Reload + ld.d $s0, $sp, 152 # 8-byte Folded Reload + ld.d $fp, $sp, 160 # 8-byte Folded Reload + ld.d $ra, $sp, 168 # 8-byte Folded Reload + addi.d $sp, $sp, 176 ret .LBB1_58: .Ltmp495: # EH_LABEL @@ -4000,8 +3997,7 @@ _ZL19checkVectorFunctionIifEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvreplgr2vr.w $xr1, $s4 ori $a1, $zero, 3968 ori $a2, $zero, 4000 - lu12i.w $a3, 2048 - xvreplgr2vr.w $xr0, $a3 + xvldi $xr0, -3456 .p2align 4, , 16 .LBB2_10: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -4720,17 +4716,17 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception3 # %bb.0: - addi.d $sp, $sp, -208 - .cfi_def_cfa_offset 208 - st.d $ra, $sp, 200 # 8-byte Folded Spill - st.d $fp, $sp, 192 # 8-byte Folded Spill - st.d $s0, $sp, 184 # 8-byte Folded Spill - st.d $s1, $sp, 176 # 8-byte Folded Spill - st.d $s2, $sp, 168 # 8-byte Folded Spill - st.d $s3, $sp, 160 # 8-byte Folded Spill - st.d $s4, $sp, 152 # 8-byte Folded Spill - st.d $s5, $sp, 144 # 8-byte Folded Spill - st.d $s6, $sp, 136 # 8-byte Folded Spill + addi.d $sp, $sp, -192 + .cfi_def_cfa_offset 192 + st.d $ra, $sp, 184 # 8-byte Folded Spill + st.d $fp, $sp, 176 # 8-byte Folded Spill + st.d $s0, $sp, 168 # 8-byte Folded Spill + st.d $s1, $sp, 160 # 8-byte Folded Spill + st.d $s2, $sp, 152 # 8-byte Folded Spill + st.d $s3, $sp, 144 # 8-byte Folded Spill + st.d $s4, $sp, 136 # 8-byte Folded Spill + st.d $s5, $sp, 128 # 8-byte Folded Spill + st.d $s6, $sp, 120 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -4792,15 +4788,15 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec # %bb.4: move $s0, $a0 lu12i.w $s5, 524280 - st.w $s5, $sp, 128 + st.w $s5, $sp, 112 pcalau12i $a0, %pc_hi20(_ZL3rng) addi.d $s3, $a0, %pc_lo12(_ZL3rng) move $s6, $zero .p2align 4, , 16 .LBB3_5: # =>This Inner Loop Header: Depth=1 .Ltmp588: # EH_LABEL - addi.d $a0, $sp, 128 - addi.d $a2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a2, $sp, 112 move $a1, $s3 pcaddu18i $ra, %call36(_ZNSt24uniform_int_distributionIsEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEsRT_RKNS0_10param_typeE) jirl $ra, $ra, 0 @@ -4811,7 +4807,7 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec addi.d $s6, $s6, 2 bne $s6, $s4, .LBB3_5 # %bb.7: - st.w $s5, $sp, 128 + st.w $s5, $sp, 112 pcalau12i $a0, %pc_hi20(_ZL3rng) addi.d $s3, $a0, %pc_lo12(_ZL3rng) move $s4, $zero @@ -4819,8 +4815,8 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec .p2align 4, , 16 .LBB3_8: # =>This Inner Loop Header: Depth=1 .Ltmp591: # EH_LABEL - addi.d $a0, $sp, 128 - addi.d $a2, $sp, 128 + addi.d $a0, $sp, 112 + addi.d $a2, $sp, 112 move $a1, $s3 pcaddu18i $ra, %call36(_ZNSt24uniform_int_distributionIsEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEsRT_RKNS0_10param_typeE) jirl $ra, $ra, 0 @@ -4832,34 +4828,34 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec bne $s4, $s5, .LBB3_8 # %bb.10: ld.d $a0, $s2, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.h $a1, $sp, 118 + st.h $a1, $sp, 102 beqz $a0, .LBB3_46 # %bb.11: ld.d $a4, $s2, 24 .Ltmp594: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp595: # EH_LABEL # %bb.12: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.h $a1, $sp, 118 + st.h $a1, $sp, 102 beqz $a0, .LBB3_48 # %bb.13: ld.d $a4, $s1, 24 .Ltmp596: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp597: # EH_LABEL @@ -4871,8 +4867,7 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvreplgr2vr.h $xr1, $s4 xvst $xr1, $fp, 0 xvst $xr1, $fp, 32 - lu12i.w $a0, 8 - xvreplgr2vr.h $xr0, $a0 + xvldi $xr0, -2688 xvst $xr0, $s0, 0 xvst $xr0, $s0, 32 xvst $xr1, $fp, 64 @@ -4995,50 +4990,49 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvst $xr1, $sp, 32 # 32-byte Folded Spill xvst $xr1, $fp, 1952 xvst $xr0, $s0, 1920 - xvst $xr0, $sp, 80 # 32-byte Folded Spill + xvst $xr0, $sp, 64 # 32-byte Folded Spill xvst $xr0, $s0, 1952 - vreplgr2vr.h $vr1, $s4 - vreplgr2vr.h $vr0, $a0 + vreplgr2vr.h $vr0, $s4 + vst $vr0, $sp, 16 # 16-byte Folded Spill + vst $vr0, $fp, 1984 ori $a0, $zero, 1000 - st.h $a0, $sp, 118 + st.h $a0, $sp, 102 ld.d $a0, $s2, 16 - vst $vr1, $sp, 16 # 16-byte Folded Spill - vst $vr1, $fp, 1984 - vst $vr0, $sp, 64 # 16-byte Folded Spill + vldi $vr0, -2688 vst $vr0, $s0, 1984 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 beqz $a0, .LBB3_51 # %bb.16: ld.d $a4, $s2, 24 .Ltmp600: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp601: # EH_LABEL # %bb.17: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.h $a1, $sp, 118 + st.h $a1, $sp, 102 beqz $a0, .LBB3_53 # %bb.18: ld.d $a4, $s1, 24 .Ltmp602: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp603: # EH_LABEL # %bb.19: bne $s3, $a0, .LBB3_55 # %bb.20: # %vector.body254 - xvld $xr0, $sp, 80 # 32-byte Folded Reload + xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $fp, 0 xvst $xr0, $fp, 32 xvld $xr1, $sp, 32 # 32-byte Folded Reload @@ -5164,46 +5158,46 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvst $xr0, $fp, 1952 xvst $xr1, $s0, 1920 xvst $xr1, $s0, 1952 + vldi $vr0, -2688 ori $a0, $zero, 1000 - st.h $a0, $sp, 118 + st.h $a0, $sp, 102 ld.d $a0, $s2, 16 - vld $vr0, $sp, 64 # 16-byte Folded Reload vst $vr0, $fp, 1984 vld $vr0, $sp, 16 # 16-byte Folded Reload vst $vr0, $s0, 1984 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 beqz $a0, .LBB3_56 # %bb.21: ld.d $a4, $s2, 24 .Ltmp606: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp607: # EH_LABEL # %bb.22: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.h $a1, $sp, 118 + st.h $a1, $sp, 102 beqz $a0, .LBB3_58 # %bb.23: ld.d $a4, $s1, 24 .Ltmp608: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp609: # EH_LABEL # %bb.24: bne $s3, $a0, .LBB3_60 # %bb.25: # %vector.body271 - xvld $xr0, $sp, 80 # 32-byte Folded Reload + xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $s0, 0 xvst $xr0, $s0, 32 xvst $xr0, $fp, 0 @@ -5328,46 +5322,46 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvst $xr0, $s0, 1952 xvst $xr0, $fp, 1920 xvst $xr0, $fp, 1952 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vldi $vr0, -2688 vst $vr0, $s0, 1984 ori $a0, $zero, 1000 - st.h $a0, $sp, 118 + st.h $a0, $sp, 102 ld.d $a0, $s2, 16 vst $vr0, $fp, 1984 st.h $s4, $fp, 1996 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 beqz $a0, .LBB3_61 # %bb.26: ld.d $a4, $s2, 24 .Ltmp612: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp613: # EH_LABEL # %bb.27: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.h $a1, $sp, 118 + st.h $a1, $sp, 102 beqz $a0, .LBB3_63 # %bb.28: ld.d $a4, $s1, 24 .Ltmp614: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp615: # EH_LABEL # %bb.29: bne $s3, $a0, .LBB3_65 # %bb.30: # %vector.body288 - xvld $xr0, $sp, 80 # 32-byte Folded Reload + xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $s0, 0 xvst $xr0, $s0, 32 xvst $xr0, $fp, 0 @@ -5492,46 +5486,46 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvst $xr0, $s0, 1952 xvst $xr0, $fp, 1920 xvst $xr0, $fp, 1952 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vldi $vr0, -2688 vst $vr0, $s0, 1984 ori $a0, $zero, 1000 - st.h $a0, $sp, 118 + st.h $a0, $sp, 102 ld.d $a0, $s2, 16 vst $vr0, $fp, 1984 st.h $s4, $fp, 0 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 beqz $a0, .LBB3_66 # %bb.31: ld.d $a4, $s2, 24 .Ltmp618: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp619: # EH_LABEL # %bb.32: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.h $a1, $sp, 118 + st.h $a1, $sp, 102 beqz $a0, .LBB3_68 # %bb.33: ld.d $a4, $s1, 24 .Ltmp620: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp621: # EH_LABEL # %bb.34: bne $s3, $a0, .LBB3_70 # %bb.35: # %vector.body305 - xvld $xr0, $sp, 80 # 32-byte Folded Reload + xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $s0, 0 xvst $xr0, $s0, 32 xvst $xr0, $fp, 0 @@ -5656,46 +5650,46 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvst $xr0, $s0, 1952 xvst $xr0, $fp, 1920 xvst $xr0, $fp, 1952 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vldi $vr0, -2688 vst $vr0, $s0, 1984 ori $a0, $zero, 1000 - st.h $a0, $sp, 118 + st.h $a0, $sp, 102 ld.d $a0, $s2, 16 vst $vr0, $fp, 1984 st.h $s4, $fp, 1998 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 beqz $a0, .LBB3_71 # %bb.36: ld.d $a4, $s2, 24 .Ltmp624: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp625: # EH_LABEL # %bb.37: move $s3, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.h $a1, $sp, 118 + st.h $a1, $sp, 102 beqz $a0, .LBB3_73 # %bb.38: ld.d $a4, $s1, 24 .Ltmp626: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp627: # EH_LABEL # %bb.39: bne $s3, $a0, .LBB3_75 # %bb.40: # %vector.body322 - xvld $xr0, $sp, 80 # 32-byte Folded Reload + xvld $xr0, $sp, 64 # 32-byte Folded Reload xvst $xr0, $s0, 0 xvst $xr0, $s0, 32 xvst $xr0, $fp, 0 @@ -5819,41 +5813,41 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec xvst $xr0, $s0, 1920 xvst $xr0, $s0, 1952 xvst $xr0, $fp, 1920 - vld $vr1, $sp, 64 # 16-byte Folded Reload - vst $vr1, $fp, 1984 + xvst $xr0, $fp, 1952 + vldi $vr0, -2688 + vst $vr0, $fp, 1984 st.h $s4, $fp, 1998 st.h $s4, $fp, 0 ori $a0, $zero, 1000 - st.h $a0, $sp, 118 - ld.d $a0, $s2, 16 - xvst $xr0, $fp, 1952 - vst $vr1, $s0, 1984 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 - beqz $a0, .LBB3_76 + ld.d $a1, $s2, 16 + st.h $a0, $sp, 102 + vst $vr0, $s0, 1984 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 + beqz $a1, .LBB3_76 # %bb.41: ld.d $a4, $s2, 24 .Ltmp630: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s2 jirl $ra, $a4, 0 .Ltmp631: # EH_LABEL # %bb.42: move $s2, $a0 ld.d $a0, $s1, 16 - st.d $fp, $sp, 128 - st.d $s0, $sp, 120 + st.d $fp, $sp, 112 + st.d $s0, $sp, 104 ori $a1, $zero, 1000 - st.h $a1, $sp, 118 + st.h $a1, $sp, 102 beqz $a0, .LBB3_78 # %bb.43: ld.d $a4, $s1, 24 .Ltmp632: # EH_LABEL - addi.d $a1, $sp, 128 - addi.d $a2, $sp, 120 - addi.d $a3, $sp, 118 + addi.d $a1, $sp, 112 + addi.d $a2, $sp, 104 + addi.d $a3, $sp, 102 move $a0, $s1 jirl $ra, $a4, 0 .Ltmp633: # EH_LABEL @@ -5866,16 +5860,16 @@ _ZL19checkVectorFunctionIssEvSt8functionIFT_PT0_S3_S1_EES5_PKc: # @_ZL19checkVec move $a0, $fp pcaddu18i $ra, %call36(_ZdaPv) jirl $ra, $ra, 0 - ld.d $s6, $sp, 136 # 8-byte Folded Reload - ld.d $s5, $sp, 144 # 8-byte Folded Reload - ld.d $s4, $sp, 152 # 8-byte Folded Reload - ld.d $s3, $sp, 160 # 8-byte Folded Reload - ld.d $s2, $sp, 168 # 8-byte Folded Reload - ld.d $s1, $sp, 176 # 8-byte Folded Reload - ld.d $s0, $sp, 184 # 8-byte Folded Reload - ld.d $fp, $sp, 192 # 8-byte Folded Reload - ld.d $ra, $sp, 200 # 8-byte Folded Reload - addi.d $sp, $sp, 208 + ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 128 # 8-byte Folded Reload + ld.d $s4, $sp, 136 # 8-byte Folded Reload + ld.d $s3, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 152 # 8-byte Folded Reload + ld.d $s1, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload + ld.d $fp, $sp, 176 # 8-byte Folded Reload + ld.d $ra, $sp, 184 # 8-byte Folded Reload + addi.d $sp, $sp, 192 ret .LBB3_46: .Ltmp675: # EH_LABEL @@ -6639,7 +6633,7 @@ _ZNSt17_Function_handlerIFiPiS0_iEZ4mainE3$_1E9_M_invokeERKSt9_Any_dataOS0_S7_Oi pcalau12i $a5, %pc_hi20(.LCPI9_0) xvld $xr0, $a5, %pc_lo12(.LCPI9_0) addi.d $a5, $a3, 32 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $a6, $a1, 32 move $a7, $a2 xvori.b $xr2, $xr1, 0 @@ -7198,7 +7192,7 @@ _ZNSt17_Function_handlerIFiPfS0_iEZ4mainE3$_1E9_M_invokeERKSt9_Any_dataOS0_S7_Oi pcalau12i $a5, %pc_hi20(.LCPI14_0) xvld $xr0, $a5, %pc_lo12(.LCPI14_0) addi.d $a5, $a3, 32 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $a6, $a1, 32 move $a7, $a2 xvori.b $xr2, $xr1, 0 @@ -7546,12 +7540,12 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE3$_3E9_M_invokeERKSt9_Any_dataOS0_S7_Os slli.d $a3, $a3, 5 addi.d $a6, $a2, 32 pcalau12i $a7, %pc_hi20(.LCPI19_0) - xvld $xr1, $a7, %pc_lo12(.LCPI19_0) - xvreplgr2vr.h $xr0, $a4 + xvld $xr0, $a7, %pc_lo12(.LCPI19_0) addi.d $a7, $a1, 32 + xvldi $xr1, -2688 xvrepli.h $xr2, 32 move $t0, $a3 - xvori.b $xr3, $xr0, 0 + xvori.b $xr3, $xr1, 0 .p2align 4, , 16 .LBB19_7: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -7559,18 +7553,18 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE3$_3E9_M_invokeERKSt9_Any_dataOS0_S7_Os xvld $xr5, $a7, 0 xvld $xr6, $a6, -32 xvld $xr7, $a6, 0 - xvaddi.hu $xr8, $xr1, 16 + xvaddi.hu $xr8, $xr0, 16 xvslt.h $xr4, $xr6, $xr4 xvslt.h $xr5, $xr7, $xr5 - xvbitsel.v $xr0, $xr0, $xr1, $xr4 + xvbitsel.v $xr1, $xr1, $xr0, $xr4 xvbitsel.v $xr3, $xr3, $xr8, $xr5 - xvadd.h $xr1, $xr1, $xr2 + xvadd.h $xr0, $xr0, $xr2 addi.d $t0, $t0, -32 addi.d $a6, $a6, 64 addi.d $a7, $a7, 64 bnez $t0, .LBB19_7 # %bb.8: # %middle.block - xvmax.h $xr0, $xr0, $xr3 + xvmax.h $xr0, $xr1, $xr3 xvpermi.q $xr1, $xr0, 1 vmax.h $vr0, $vr0, $vr1 vbsrl.v $vr1, $vr0, 8 @@ -7791,7 +7785,7 @@ _ZNSt17_Function_handlerIFiPiS0_iEZ4mainE3$_5E9_M_invokeERKSt9_Any_dataOS0_S7_Oi pcalau12i $a5, %pc_hi20(.LCPI23_0) xvld $xr0, $a5, %pc_lo12(.LCPI23_0) addi.d $a5, $a3, 32 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $a6, $a1, 32 move $a7, $a2 xvori.b $xr2, $xr1, 0 @@ -8029,7 +8023,7 @@ _ZNSt17_Function_handlerIFiPfS0_iEZ4mainE3$_5E9_M_invokeERKSt9_Any_dataOS0_S7_Oi pcalau12i $a5, %pc_hi20(.LCPI27_0) xvld $xr0, $a5, %pc_lo12(.LCPI27_0) addi.d $a5, $a3, 32 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $a6, $a1, 32 move $a7, $a2 xvori.b $xr2, $xr1, 0 @@ -8283,12 +8277,12 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE3$_7E9_M_invokeERKSt9_Any_dataOS0_S7_Os slli.d $a3, $a3, 5 addi.d $a6, $a2, 32 pcalau12i $a7, %pc_hi20(.LCPI31_0) - xvld $xr1, $a7, %pc_lo12(.LCPI31_0) - xvreplgr2vr.h $xr0, $a4 + xvld $xr0, $a7, %pc_lo12(.LCPI31_0) addi.d $a7, $a1, 32 + xvldi $xr1, -2688 xvrepli.h $xr2, 32 move $t0, $a3 - xvori.b $xr3, $xr0, 0 + xvori.b $xr3, $xr1, 0 .p2align 4, , 16 .LBB31_7: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -8296,18 +8290,18 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE3$_7E9_M_invokeERKSt9_Any_dataOS0_S7_Os xvld $xr5, $a7, 0 xvld $xr6, $a6, -32 xvld $xr7, $a6, 0 - xvaddi.hu $xr8, $xr1, 16 + xvaddi.hu $xr8, $xr0, 16 xvslt.h $xr4, $xr6, $xr4 xvslt.h $xr5, $xr7, $xr5 - xvbitsel.v $xr0, $xr1, $xr0, $xr4 + xvbitsel.v $xr1, $xr0, $xr1, $xr4 xvbitsel.v $xr3, $xr8, $xr3, $xr5 - xvadd.h $xr1, $xr1, $xr2 + xvadd.h $xr0, $xr0, $xr2 addi.d $t0, $t0, -32 addi.d $a6, $a6, 64 addi.d $a7, $a7, 64 bnez $t0, .LBB31_7 # %bb.8: # %middle.block - xvmax.h $xr0, $xr0, $xr3 + xvmax.h $xr0, $xr1, $xr3 xvpermi.q $xr1, $xr0, 1 vmax.h $vr0, $vr0, $vr1 vbsrl.v $vr1, $vr0, 8 @@ -8525,7 +8519,7 @@ _ZNSt17_Function_handlerIFiPiS0_iEZ4mainE3$_9E9_M_invokeERKSt9_Any_dataOS0_S7_Oi pcalau12i $a5, %pc_hi20(.LCPI35_0) xvld $xr0, $a5, %pc_lo12(.LCPI35_0) addi.d $a5, $a2, 32 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $a6, $a3, 32 move $a7, $a1 xvori.b $xr2, $xr1, 0 @@ -8758,7 +8752,7 @@ _ZNSt17_Function_handlerIFiPfS0_iEZ4mainE3$_9E9_M_invokeERKSt9_Any_dataOS0_S7_Oi pcalau12i $a5, %pc_hi20(.LCPI39_0) xvld $xr0, $a5, %pc_lo12(.LCPI39_0) addi.d $a5, $a2, 32 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $a6, $a3, 32 move $a7, $a1 xvori.b $xr2, $xr1, 0 @@ -9004,12 +8998,12 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE4$_11E9_M_invokeERKSt9_Any_dataOS0_S7_O slli.d $a4, $a4, 5 addi.d $a6, $a2, 32 pcalau12i $a7, %pc_hi20(.LCPI43_0) - xvld $xr1, $a7, %pc_lo12(.LCPI43_0) - xvreplgr2vr.h $xr0, $a5 + xvld $xr0, $a7, %pc_lo12(.LCPI43_0) addi.d $a7, $a3, 32 + xvldi $xr1, -2688 xvrepli.h $xr2, 32 move $t0, $a4 - xvori.b $xr3, $xr0, 0 + xvori.b $xr3, $xr1, 0 .p2align 4, , 16 .LBB43_6: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -9017,18 +9011,18 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE4$_11E9_M_invokeERKSt9_Any_dataOS0_S7_O xvld $xr5, $a7, 0 xvld $xr6, $a6, -32 xvld $xr7, $a6, 0 - xvaddi.hu $xr8, $xr1, 16 + xvaddi.hu $xr8, $xr0, 16 xvslt.h $xr4, $xr6, $xr4 xvslt.h $xr5, $xr7, $xr5 - xvbitsel.v $xr0, $xr0, $xr1, $xr4 + xvbitsel.v $xr1, $xr1, $xr0, $xr4 xvbitsel.v $xr3, $xr3, $xr8, $xr5 - xvadd.h $xr1, $xr1, $xr2 + xvadd.h $xr0, $xr0, $xr2 addi.d $t0, $t0, -32 addi.d $a6, $a6, 64 addi.d $a7, $a7, 64 bnez $t0, .LBB43_6 # %bb.7: # %middle.block - xvmax.h $xr0, $xr0, $xr3 + xvmax.h $xr0, $xr1, $xr3 xvpermi.q $xr1, $xr0, 1 vmax.h $vr0, $vr0, $vr1 vbsrl.v $vr1, $vr0, 8 @@ -9967,7 +9961,7 @@ _ZNSt17_Function_handlerIFiPiS0_iEZ4mainE4$_21E9_M_invokeERKSt9_Any_dataOS0_S7_O pcalau12i $a6, %pc_hi20(.LCPI71_0) xvld $xr0, $a6, %pc_lo12(.LCPI71_0) addi.d $a6, $a1, 44 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $t0, $a2, 44 move $t1, $a5 xvori.b $xr2, $xr1, 0 @@ -10219,7 +10213,7 @@ _ZNSt17_Function_handlerIFiPfS0_iEZ4mainE4$_21E9_M_invokeERKSt9_Any_dataOS0_S7_O pcalau12i $a6, %pc_hi20(.LCPI75_0) xvld $xr0, $a6, %pc_lo12(.LCPI75_0) addi.d $a6, $a1, 44 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $t0, $a2, 44 move $t1, $a5 xvori.b $xr2, $xr1, 0 @@ -10487,12 +10481,12 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE4$_23E9_M_invokeERKSt9_Any_dataOS0_S7_O bstrins.d $a7, $t1, 4, 0 addi.d $t1, $a1, 38 pcalau12i $t2, %pc_hi20(.LCPI79_0) - xvld $xr1, $t2, %pc_lo12(.LCPI79_0) - xvreplgr2vr.h $xr0, $a4 + xvld $xr0, $t2, %pc_lo12(.LCPI79_0) addi.d $t2, $a2, 38 + xvldi $xr1, -2688 xvrepli.h $xr2, 32 move $t3, $a5 - xvori.b $xr3, $xr0, 0 + xvori.b $xr3, $xr1, 0 .p2align 4, , 16 .LBB79_7: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -10500,18 +10494,18 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE4$_23E9_M_invokeERKSt9_Any_dataOS0_S7_O xvld $xr5, $t1, 0 xvld $xr6, $t2, -32 xvld $xr7, $t2, 0 - xvaddi.hu $xr8, $xr1, 16 + xvaddi.hu $xr8, $xr0, 16 xvslt.h $xr4, $xr6, $xr4 xvslt.h $xr5, $xr7, $xr5 - xvbitsel.v $xr0, $xr0, $xr1, $xr4 + xvbitsel.v $xr1, $xr1, $xr0, $xr4 xvbitsel.v $xr3, $xr3, $xr8, $xr5 - xvadd.h $xr1, $xr1, $xr2 + xvadd.h $xr0, $xr0, $xr2 addi.d $t1, $t1, 64 addi.d $t3, $t3, -32 addi.d $t2, $t2, 64 bnez $t3, .LBB79_7 # %bb.8: # %middle.block - xvmax.h $xr0, $xr0, $xr3 + xvmax.h $xr0, $xr1, $xr3 xvpermi.q $xr1, $xr0, 1 vmax.h $vr0, $vr0, $vr1 vbsrl.v $vr1, $vr0, 8 @@ -10740,7 +10734,7 @@ _ZNSt17_Function_handlerIFiPiS0_iEZ4mainE4$_25E9_M_invokeERKSt9_Any_dataOS0_S7_O pcalau12i $a6, %pc_hi20(.LCPI83_0) xvld $xr0, $a6, %pc_lo12(.LCPI83_0) addi.d $a6, $a1, 44 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $t0, $a2, 44 move $t1, $a5 xvori.b $xr2, $xr1, 0 @@ -10985,7 +10979,7 @@ _ZNSt17_Function_handlerIFiPfS0_iEZ4mainE4$_25E9_M_invokeERKSt9_Any_dataOS0_S7_O pcalau12i $a6, %pc_hi20(.LCPI87_0) xvld $xr0, $a6, %pc_lo12(.LCPI87_0) addi.d $a6, $a1, 44 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $t0, $a2, 44 move $t1, $a5 xvori.b $xr2, $xr1, 0 @@ -11242,12 +11236,12 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE4$_27E9_M_invokeERKSt9_Any_dataOS0_S7_O bstrins.d $a7, $t0, 4, 0 addi.d $t0, $a1, 38 pcalau12i $t1, %pc_hi20(.LCPI91_0) - xvld $xr1, $t1, %pc_lo12(.LCPI91_0) - xvreplgr2vr.h $xr0, $a4 + xvld $xr0, $t1, %pc_lo12(.LCPI91_0) addi.d $t1, $a2, 38 + xvldi $xr1, -2688 xvrepli.h $xr2, 32 move $t2, $a5 - xvori.b $xr3, $xr0, 0 + xvori.b $xr3, $xr1, 0 .p2align 4, , 16 .LBB91_5: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -11255,18 +11249,18 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE4$_27E9_M_invokeERKSt9_Any_dataOS0_S7_O xvld $xr5, $t0, 0 xvld $xr6, $t1, -32 xvld $xr7, $t1, 0 - xvaddi.hu $xr8, $xr1, 16 + xvaddi.hu $xr8, $xr0, 16 xvslt.h $xr4, $xr6, $xr4 xvslt.h $xr5, $xr7, $xr5 - xvbitsel.v $xr0, $xr0, $xr1, $xr4 + xvbitsel.v $xr1, $xr1, $xr0, $xr4 xvbitsel.v $xr3, $xr3, $xr8, $xr5 - xvadd.h $xr1, $xr1, $xr2 + xvadd.h $xr0, $xr0, $xr2 addi.d $t0, $t0, 64 addi.d $t2, $t2, -32 addi.d $t1, $t1, 64 bnez $t2, .LBB91_5 # %bb.6: # %middle.block - xvmax.h $xr0, $xr0, $xr3 + xvmax.h $xr0, $xr1, $xr3 xvpermi.q $xr1, $xr0, 1 vmax.h $vr0, $vr0, $vr1 vbsrl.v $vr1, $vr0, 8 @@ -11497,7 +11491,7 @@ _ZNSt17_Function_handlerIFiPiS0_iEZ4mainE4$_29E9_M_invokeERKSt9_Any_dataOS0_S7_O pcalau12i $a6, %pc_hi20(.LCPI95_0) xvld $xr0, $a6, %pc_lo12(.LCPI95_0) addi.d $a6, $a1, 44 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $t0, $a2, 44 move $t1, $a5 xvori.b $xr2, $xr1, 0 @@ -11744,7 +11738,7 @@ _ZNSt17_Function_handlerIFiPfS0_iEZ4mainE4$_29E9_M_invokeERKSt9_Any_dataOS0_S7_O pcalau12i $a6, %pc_hi20(.LCPI99_0) xvld $xr0, $a6, %pc_lo12(.LCPI99_0) addi.d $a6, $a1, 44 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $t0, $a2, 44 move $t1, $a5 xvori.b $xr2, $xr1, 0 @@ -12003,12 +11997,12 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE4$_31E9_M_invokeERKSt9_Any_dataOS0_S7_O bstrins.d $a7, $t0, 4, 0 addi.d $t0, $a1, 38 pcalau12i $t1, %pc_hi20(.LCPI103_0) - xvld $xr1, $t1, %pc_lo12(.LCPI103_0) - xvreplgr2vr.h $xr0, $a4 + xvld $xr0, $t1, %pc_lo12(.LCPI103_0) addi.d $t1, $a2, 38 + xvldi $xr1, -2688 xvrepli.h $xr2, 32 move $t2, $a5 - xvori.b $xr3, $xr0, 0 + xvori.b $xr3, $xr1, 0 .p2align 4, , 16 .LBB103_6: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -12016,18 +12010,18 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE4$_31E9_M_invokeERKSt9_Any_dataOS0_S7_O xvld $xr5, $t0, 0 xvld $xr6, $t1, -32 xvld $xr7, $t1, 0 - xvaddi.hu $xr8, $xr1, 16 + xvaddi.hu $xr8, $xr0, 16 xvslt.h $xr4, $xr6, $xr4 xvslt.h $xr5, $xr7, $xr5 - xvbitsel.v $xr0, $xr0, $xr1, $xr4 + xvbitsel.v $xr1, $xr1, $xr0, $xr4 xvbitsel.v $xr3, $xr3, $xr8, $xr5 - xvadd.h $xr1, $xr1, $xr2 + xvadd.h $xr0, $xr0, $xr2 addi.d $t0, $t0, 64 addi.d $t2, $t2, -32 addi.d $t1, $t1, 64 bnez $t2, .LBB103_6 # %bb.7: # %middle.block - xvmax.h $xr0, $xr0, $xr3 + xvmax.h $xr0, $xr1, $xr3 xvpermi.q $xr1, $xr0, 1 vmax.h $vr0, $vr0, $vr1 vbsrl.v $vr1, $vr0, 8 @@ -12254,7 +12248,7 @@ _ZNSt17_Function_handlerIFiPiS0_iEZ4mainE4$_33E9_M_invokeERKSt9_Any_dataOS0_S7_O pcalau12i $a6, %pc_hi20(.LCPI107_0) xvld $xr0, $a6, %pc_lo12(.LCPI107_0) addi.d $a6, $a1, 44 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $t0, $a2, 44 move $t1, $a5 xvori.b $xr2, $xr1, 0 @@ -12497,7 +12491,7 @@ _ZNSt17_Function_handlerIFiPfS0_iEZ4mainE4$_33E9_M_invokeERKSt9_Any_dataOS0_S7_O pcalau12i $a6, %pc_hi20(.LCPI111_0) xvld $xr0, $a6, %pc_lo12(.LCPI111_0) addi.d $a6, $a1, 44 - xvreplgr2vr.w $xr1, $a4 + xvldi $xr1, -3200 addi.d $t0, $a2, 44 move $t1, $a5 xvori.b $xr2, $xr1, 0 @@ -12754,12 +12748,12 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE4$_35E9_M_invokeERKSt9_Any_dataOS0_S7_O bstrins.d $a7, $t0, 4, 0 addi.d $t0, $a1, 38 pcalau12i $t1, %pc_hi20(.LCPI115_0) - xvld $xr1, $t1, %pc_lo12(.LCPI115_0) - xvreplgr2vr.h $xr0, $a4 + xvld $xr0, $t1, %pc_lo12(.LCPI115_0) addi.d $t1, $a2, 38 + xvldi $xr1, -2688 xvrepli.h $xr2, 32 move $t2, $a5 - xvori.b $xr3, $xr0, 0 + xvori.b $xr3, $xr1, 0 .p2align 4, , 16 .LBB115_6: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -12767,18 +12761,18 @@ _ZNSt17_Function_handlerIFsPsS0_sEZ4mainE4$_35E9_M_invokeERKSt9_Any_dataOS0_S7_O xvld $xr5, $t0, 0 xvld $xr6, $t1, -32 xvld $xr7, $t1, 0 - xvaddi.hu $xr8, $xr1, 16 + xvaddi.hu $xr8, $xr0, 16 xvslt.h $xr4, $xr6, $xr4 xvslt.h $xr5, $xr7, $xr5 - xvbitsel.v $xr0, $xr0, $xr1, $xr4 + xvbitsel.v $xr1, $xr1, $xr0, $xr4 xvbitsel.v $xr3, $xr3, $xr8, $xr5 - xvadd.h $xr1, $xr1, $xr2 + xvadd.h $xr0, $xr0, $xr2 addi.d $t0, $t0, 64 addi.d $t2, $t2, -32 addi.d $t1, $t1, 64 bnez $t2, .LBB115_6 # %bb.7: # %middle.block - xvmax.h $xr0, $xr0, $xr3 + xvmax.h $xr0, $xr1, $xr3 xvpermi.q $xr1, $xr0, 1 vmax.h $vr0, $vr0, $vr1 vbsrl.v $vr1, $vr0, 8 diff --git a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/fmax-reduction.dir/fmax-reduction.s b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/fmax-reduction.dir/fmax-reduction.s index af141d27..6780d595 100644 --- a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/fmax-reduction.dir/fmax-reduction.s +++ b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/fmax-reduction.dir/fmax-reduction.s @@ -2288,8 +2288,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $a3, 0 .Ltmp279: # EH_LABEL .LBB1_52: # %vector.body1258 - lu12i.w $a0, 2048 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -3456 xvst $xr0, $fp, 0 xvst $xr0, $fp, 32 xvst $xr0, $fp, 64 @@ -2967,8 +2966,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .LBB1_100: # %.preheader715 move $s6, $zero addi.d $s7, $fp, 4 - lu12i.w $a0, -264192 - xvreplgr2vr.w $xr0, $a0 + xvldi $xr0, -1296 xvst $xr0, $sp, 240 # 32-byte Folded Spill lu12i.w $s3, -524288 lu32i.d $s3, 0 @@ -7478,9 +7476,8 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_0E10_M_managerERSt9_Any_dataRKS4_St18_M .type _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_1E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_1E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE3$_1E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: # %vector.ph - lu12i.w $a0, -262144 - xvreplgr2vr.w $xr0, $a0 ld.d $a0, $a1, 0 + xvldi $xr0, -3136 lu12i.w $a1, -2 ori $a6, $a1, 4032 lu12i.w $a1, 1 @@ -7698,9 +7695,8 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_2E10_M_managerERSt9_Any_dataRKS4_St18_M .type _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_3E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_3E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE3$_3E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: # %vector.ph - lu12i.w $a0, 2048 - xvreplgr2vr.w $xr0, $a0 ld.d $a0, $a1, 0 + xvldi $xr0, -3456 lu12i.w $a1, -2 ori $a6, $a1, 4032 lu12i.w $a1, 1